From 51de5181d1dc17008229c2c7e9abe59beb9954dd Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 31 Jul 2025 15:21:42 +0200 Subject: [PATCH] r600/sfn: Handle indirect access to GS input arrays nir_intrinsic_load_per_vertex_input has two source values, indirect access by using the first value is already handled, but accessing arrays with the second index was missing but became relevant with handling most IO optimizations with nir_opt_varyings. Fixes: 37ae4df3e4d ("glsl: remove most IO optimizations that are replaced by nir_opt_varyings") Signed-off-by: Gert Wollny Part-of: (cherry picked from commit 39543f6b5cc9b77119c31daaf0954e209c5f5bde) --- .pick_status.json | 2 +- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 39 +++++++++++++++++++ .../drivers/r600/sfn/sfn_shader_gs.cpp | 22 +++++++---- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index b7f89a100f6..0172496f9f0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3624,7 +3624,7 @@ "description": "r600/sfn: Handle indirect access to GS input arrays", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "37ae4df3e4d90a2a474e313d4a563a0bb4c00cfe", "notes": null diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index e7667ead4e7..3c20afb15f7 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -275,6 +275,36 @@ private: } }; +class LowerGSArrayInput : public NirLowerInstruction { + bool filter(const nir_instr *instr) const override + { + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_per_vertex_input) + return false; + + return nir_intrinsic_io_semantics(intr).num_slots != 1; + } + + nir_def *lower(nir_instr *instr) override + { + auto intr = nir_instr_as_intrinsic(instr); + auto vbase = nir_r600_indirect_vertex_at_index(b, 32, intr->src[0].ssa); + auto new_addr = + nir_iadd(b, vbase, nir_ishl(b, intr->src[1].ssa, nir_imm_int(b, 2))); + auto io_semantics = nir_intrinsic_io_semantics(intr); + return nir_load_r600_indirect_per_vertex_input(b, + intr->num_components, + intr->def.bit_size, + new_addr, + nir_imm_zero(b, 1, 32), + .base = nir_intrinsic_base(intr), + .range = nir_intrinsic_range(intr), + .io_semantics = io_semantics); + } +}; } // namespace r600 static nir_intrinsic_op @@ -367,6 +397,13 @@ r600_lower_clipvertex_to_clipdist(nir_shader *sh, pipe_stream_output_info& so_in return result; } +static bool +r600_lower_gs_input_array(nir_shader *sh) +{ + assert(sh->info.stage == MESA_SHADER_GEOMETRY); + return r600::LowerGSArrayInput().run(sh); +} + static bool r600_nir_lower_atomics(nir_shader *shader) { @@ -919,6 +956,8 @@ r600_lower_and_optimize_nir(nir_shader *sh, if (r600_is_last_vertex_stage(sh, *key)) r600_lower_clipvertex_to_clipdist(sh, *so_info); + if (sh->info.stage == MESA_SHADER_GEOMETRY) + NIR_PASS(_, sh, r600_lower_gs_input_array); if (sh->info.stage == MESA_SHADER_TESS_CTRL || sh->info.stage == MESA_SHADER_TESS_EVAL || (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) { diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp index db315cf35aa..569705778d4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_gs.cpp @@ -88,11 +88,8 @@ GeometryShader::process_store_output(nir_intrinsic_instr *instr) bool GeometryShader::process_load_input(nir_intrinsic_instr *instr) { - auto location = static_cast(nir_intrinsic_io_semantics(instr).location); - auto index = nir_src_as_const_value(instr->src[1]); - assert(index); - - auto driver_location = nir_intrinsic_base(instr) + index->u32; + auto location = + static_cast(nir_intrinsic_io_semantics(instr).location); if (location == VARYING_SLOT_POS || location == VARYING_SLOT_PSIZ || location == VARYING_SLOT_FOGC || location == VARYING_SLOT_CLIP_VERTEX || @@ -103,10 +100,21 @@ GeometryShader::process_load_input(nir_intrinsic_instr *instr) (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31) || (location >= VARYING_SLOT_TEX0 && location <= VARYING_SLOT_TEX7)) { - add_input_at(location, driver_location); + if (nir_intrinsic_io_semantics(instr).num_slots == 1) { + auto index = nir_src_as_const_value(instr->src[1]); + auto driver_location = nir_intrinsic_base(instr) + index->u32; + add_input_at(location, driver_location); + } else { + auto base = nir_intrinsic_base(instr); + unsigned range = nir_intrinsic_range(instr); + for (unsigned i = 0; i < range; ++i) { + auto driver_location = base + i; + auto array_location = static_cast(location + i); + add_input_at(array_location, driver_location); + } + } return true; } - return false; }