From f20cd07e21720545c7b2e2f99a745a2fa4b54034 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 2 Feb 2026 10:39:50 +0800 Subject: [PATCH] radeonsi: fix mesh shader outputs kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesh shader uses store per vertex output for point size and store per primitive output for layer id. This fixes gpu-ratemeter run slow for kill point size and layer id cases when mono shader is used which expect to kill these outputs. Also gather fragment shader per primitive input info to kill mesh shader per primitive output. Fixes: e6e21dfbf23 ("radeonsi: kill outputs for mesh shader") Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_nir_kill_outputs.c | 5 ++++- src/gallium/drivers/radeonsi/si_shader_info.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_nir_kill_outputs.c b/src/gallium/drivers/radeonsi/si_nir_kill_outputs.c index 03922302a9e..dc49cbf62ee 100644 --- a/src/gallium/drivers/radeonsi/si_nir_kill_outputs.c +++ b/src/gallium/drivers/radeonsi/si_nir_kill_outputs.c @@ -35,7 +35,10 @@ bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key) continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_store_output) + if (intr->intrinsic != nir_intrinsic_store_output && + /* for mesh shader outputs */ + intr->intrinsic != nir_intrinsic_store_per_vertex_output && + intr->intrinsic != nir_intrinsic_store_per_primitive_output) continue; /* No indirect indexing allowed. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 34fcebdcdea..be5304d4a86 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -407,6 +407,7 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_primitive_input: case nir_intrinsic_load_input_vertex: case nir_intrinsic_load_interpolated_input: gather_io_instrinsic(nir, info, intr, true);