From eccd178f0b273eff440bc9cbc58e6dd74f256f14 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 31 Jul 2025 17:17:15 +0200 Subject: [PATCH] r600/sfn: Lower all GS indirect input loads after lowering IO Extend the lowering of indirect GS array inputs to include all indirect access. This replaces r600_gs_load_deref_io_to_indirect_per_vertex_input which was restricted to vec4 POS inputs. The new lowering pass works in all vayings and all types, so we can also drop the call to nir_lower_indirect_derefs. Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 186 +++-------------------- 1 file changed, 23 insertions(+), 163 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index f6da5cea1d6..b8a0440d76e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -282,16 +282,33 @@ class LowerGSArrayInput : public NirLowerInstruction { return false; auto intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_load_per_vertex_input) - return false; - - return nir_intrinsic_io_semantics(intr).num_slots != 1; + return intr->intrinsic == nir_intrinsic_load_per_vertex_input; } nir_def *lower(nir_instr *instr) override { auto intr = nir_instr_as_intrinsic(instr); - auto vbase = nir_r600_indirect_vertex_at_index(b, 32, intr->src[0].ssa); + + bool const_vertex_index = nir_src_as_const_value(intr->src[0]) != NULL; + + if (!const_vertex_index && !m_base_array) { + auto nvtx = b->shader->info.gs.vertices_in; + m_base_array = nir_decl_reg(b, 1, 32, nvtx); + for (int i = 0; i < nvtx; ++i) { + auto idx = nir_imm_int(b, i); + auto base = nir_r600_indirect_vertex_at_index(b, 32, idx); + nir_store_reg_indirect(b, base, m_base_array, idx); + } + } + + /* If the input indices were stored in the array for indirect access + * always use this to reduce the live range of the registers that are + * initialized with these values before the shader is run + */ + auto vbase = (const_vertex_index && !m_base_array) + ? nir_r600_indirect_vertex_at_index(b, 32, intr->src[0].ssa) + : nir_load_reg_indirect(b, 1, 32, m_base_array, intr->src[0].ssa); + ; auto new_addr = nir_iadd(b, vbase, nir_ishl(b, intr->src[1].ssa, nir_imm_int(b, 2))); auto io_semantics = nir_intrinsic_io_semantics(intr); @@ -304,6 +321,7 @@ class LowerGSArrayInput : public NirLowerInstruction { .range = nir_intrinsic_range(intr), .io_semantics = io_semantics); } + nir_def *m_base_array{nullptr}; }; } // namespace r600 @@ -671,169 +689,11 @@ r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) } } -struct indirect_per_vertex { - nir_deref_instr *array_indirect_deref; - uint32_t mask; - nir_instr *saved_for_removal[R600_GS_VERTEX_INDIRECT_TOTAL][4]; - unsigned obsolete_deref_count; - nir_instr *obsolete_deref[32]; -}; - -static bool -r600_nir_gs_load_deref_io_to_indirect_per_vertex_input(nir_builder *b, - nir_intrinsic_instr *intrin, - void *cb_data) -{ - struct indirect_per_vertex *indirect_per_vertex = - (struct indirect_per_vertex *)cb_data; - unsigned j; - - if (intrin->intrinsic != nir_intrinsic_load_deref) - return false; - - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - - if (!nir_deref_mode_is_one_of(deref, nir_var_shader_in)) - return false; - - nir_variable *var = nir_deref_instr_get_variable(deref); - const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage); - - if (!is_arrayed || var->data.location != VARYING_SLOT_POS) - return false; - - nir_def *array_index = deref->arr.index.ssa; - - if (!array_index) - return false; - - assert(intrin->def.num_components == 4); - - nir_deref_instr *original_array = nir_instr_as_deref( - nir_instr_as_deref(intrin->src[0].ssa->parent_instr)->parent.ssa->parent_instr); - - if (!original_array || original_array->deref_type != nir_deref_type_var || - !glsl_type_is_array(original_array->type)) - return false; - - auto element_type = glsl_without_array(original_array->type); - - if (element_type != &glsl_type_builtin_vec4) - return false; - - const unsigned array_length = glsl_get_length(original_array->type); - - assert(array_length <= R600_GS_VERTEX_INDIRECT_TOTAL); - - for (j = 0; j < indirect_per_vertex->obsolete_deref_count && - j < ARRAY_SIZE(indirect_per_vertex->obsolete_deref); - j++) - if (intrin->src[0].ssa->parent_instr == indirect_per_vertex->obsolete_deref[j]) - break; - - if (j == indirect_per_vertex->obsolete_deref_count && - j != ARRAY_SIZE(indirect_per_vertex->obsolete_deref)) { - indirect_per_vertex->obsolete_deref[j] = intrin->src[0].ssa->parent_instr; - indirect_per_vertex->obsolete_deref_count++; - } - - /* The next block generates a global array which is required - * for the indirect access. This array is located at the - * beginning. All the possible elements are generated. At the - * end, the elements which are not referenced are removed. */ - if (!indirect_per_vertex->array_indirect_deref) { - static const char array_indirect_name[] = "r600_indirect_vertex_at_index"; - - b->cursor = nir_before_block(nir_start_block(b->impl)); - - nir_variable *array_indirect_var = nir_local_variable_create( - b->impl, - glsl_array_type(glsl_int_type(), R600_GS_VERTEX_INDIRECT_TOTAL, 0), - array_indirect_name); - indirect_per_vertex->array_indirect_deref = - nir_build_deref_var(b, array_indirect_var); - - for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++) { - nir_def *build_count = nir_imm_int(b, k); - nir_deref_instr *build_array = - nir_build_deref_array(b, - indirect_per_vertex->array_indirect_deref, - build_count); - nir_def *build_store = - nir_r600_indirect_vertex_at_index(b, intrin->def.bit_size, build_count); - nir_store_deref(b, build_array, build_store, 1); - indirect_per_vertex->saved_for_removal[k][0] = build_count->parent_instr; - indirect_per_vertex->saved_for_removal[k][1] = &build_array->instr; - indirect_per_vertex->saved_for_removal[k][2] = build_store->parent_instr; - indirect_per_vertex->saved_for_removal[k][3] = - nir_instr_next(build_store->parent_instr); // nir_store_deref - } - } - - b->cursor = nir_before_instr(&intrin->instr); - - for (unsigned k = 0; k < array_length; k++) - indirect_per_vertex->mask |= (1 << k); - - nir_def *zero = nir_imm_int(b, 0); - nir_def *array_indirect_def = nir_load_deref( - b, - nir_build_deref_array(b, indirect_per_vertex->array_indirect_deref, array_index)); - nir_def *load = nir_load_r600_indirect_per_vertex_input(b, - intrin->def.num_components, - intrin->def.bit_size, - array_indirect_def, - zero); - - nir_intrinsic_set_base(nir_instr_as_intrinsic(load->parent_instr), - var->data.driver_location); - - nir_def_rewrite_uses(&intrin->def, load); - nir_instr_remove(&intrin->instr); - - return true; -} - -static bool -r600_gs_load_deref_io_to_indirect_per_vertex_input(nir_shader *shader) -{ - struct indirect_per_vertex indirect_per_vertex = {nullptr}; - bool ret = - nir_shader_intrinsics_pass(shader, - r600_nir_gs_load_deref_io_to_indirect_per_vertex_input, - nir_metadata_control_flow, - &indirect_per_vertex); - - if (indirect_per_vertex.array_indirect_deref) { - for (unsigned k = 0; k < R600_GS_VERTEX_INDIRECT_TOTAL; k++) - if ((indirect_per_vertex.mask & (1 << k)) == 0) { - nir_instr_remove(indirect_per_vertex.saved_for_removal[k][3]); - nir_instr_remove(indirect_per_vertex.saved_for_removal[k][2]); - nir_instr_remove(indirect_per_vertex.saved_for_removal[k][1]); - nir_instr_remove(indirect_per_vertex.saved_for_removal[k][0]); - } - - for (unsigned k = 0; k < indirect_per_vertex.obsolete_deref_count; k++) - nir_instr_remove(indirect_per_vertex.obsolete_deref[k]); - } - - return ret; -} - void r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level) { const int nir_lower_flrp_mask = 16 | 32 | 64; - if (nir->info.stage == MESA_SHADER_GEOMETRY) { - NIR_PASS(_, nir, r600_gs_load_deref_io_to_indirect_per_vertex_input); - NIR_PASS(_, - nir, - nir_lower_indirect_derefs, - nir_var_shader_in, - R600_GS_VERTEX_INDIRECT_TOTAL); - } - NIR_PASS(_, nir, nir_lower_flrp, nir_lower_flrp_mask, false); nir_lower_idiv_options idiv_options = {0};