From caf5a2640b1340f1985c3475c7ddb34bc1d21725 Mon Sep 17 00:00:00 2001 From: Lorenzo Rossi Date: Wed, 18 Mar 2026 17:38:54 +0100 Subject: [PATCH] panvk,panfrost: Always emit ld_var_buf when possible Previously the driver decided when the backend should use LD_VAR_BUF[_IMM] instructions based on the total number of varyings read, falling back to LD_VAR[_IMM] + descriptors when the varying index could overflow the immediate index in the instructions. That means that even adding a single varying read could overflow the index and make everything fall back to LD_VAR. With this patch the backend decides when to use LD_VAR_BUF for each varying load, reporting that decision to the driver. This helps with index overflows because only the instruction that actually overflow the immediate use the LD_VAR fallback, leaving all other instructions on the fast path. Signed-off-by: Lorenzo Rossi Reviewed-by: Faith Ekstrand Reviewed-by: Christoph Pillmayer Part-of: --- src/gallium/drivers/panfrost/pan_shader.c | 2 -- .../compiler/bifrost/bifrost_compile.c | 2 +- src/panfrost/compiler/pan_compiler.h | 14 +++++------ src/panfrost/compiler/pan_nir.h | 2 +- .../compiler/pan_nir_lower_varyings_io.c | 23 ++++++++++-------- src/panfrost/vulkan/panvk_vX_shader.c | 24 +++++++++---------- 6 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index d19347141bf..06565c8c294 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -208,8 +208,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, } if (dev->arch >= 9) { - inputs.valhall.use_ld_var_buf = inputs.varying_layout && - inputs.varying_layout->generic_size_B <= pan_ld_var_buf_off_size(dev->arch); /* Always enable this for GL, it avoids crashes when using unbound * resources. */ inputs.robust_descriptors = true; diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 033ce69e704..aa382da45bc 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -7064,7 +7064,7 @@ bifrost_compile_shader_nir(nir_shader *nir, if (!inputs->is_blend) NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id, - inputs->varying_layout, inputs->valhall.use_ld_var_buf); + inputs->varying_layout, info); } if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) { diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 4b100f596c2..cb9702678c9 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -132,13 +132,6 @@ struct pan_compile_inputs { /* In multiples of 32bit. */ uint32_t offset; } fau_consts; - - union { - struct { - /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */ - bool use_ld_var_buf; - } valhall; - }; }; enum pan_varying_section { @@ -335,6 +328,13 @@ struct bifrost_shader_info { * values for flat varyings. */ bool uses_flat_shading; + + /* Whether there are any LD_VAR[_IMM] instructions that require varying + * descriptors. LD_VAR_BUF[_IMM] do not count as they only need buffer + * descriptors to work correctly. For more details check out + * docs/drivers/panfrost/varyings.rst + */ + bool uses_ld_var; }; struct midgard_shader_info { diff --git a/src/panfrost/compiler/pan_nir.h b/src/panfrost/compiler/pan_nir.h index 9e22a5d10a5..1dec76e6d51 100644 --- a/src/panfrost/compiler/pan_nir.h +++ b/src/panfrost/compiler/pan_nir.h @@ -63,7 +63,7 @@ bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id, bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id, const struct pan_varying_layout *varying_layout, - bool valhall_use_ld_var_buf); + struct pan_shader_info *info); bool pan_nir_lower_helper_invocation(nir_shader *shader); bool pan_nir_lower_sample_pos(nir_shader *shader); diff --git a/src/panfrost/compiler/pan_nir_lower_varyings_io.c b/src/panfrost/compiler/pan_nir_lower_varyings_io.c index 42362c2763f..13c7af177fc 100644 --- a/src/panfrost/compiler/pan_nir_lower_varyings_io.c +++ b/src/panfrost/compiler/pan_nir_lower_varyings_io.c @@ -169,7 +169,7 @@ pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id, struct lower_fs_inputs_ctx { unsigned arch; const struct pan_varying_layout *varying_layout; - bool valhall_use_ld_var_buf; + struct pan_shader_info *info; }; static bool @@ -208,15 +208,17 @@ lower_fs_input_load(struct nir_builder *b, const unsigned component = nir_intrinsic_component(load); const unsigned load_comps = load->num_components + component; - nir_def *res; - if (ctx->valhall_use_ld_var_buf) { - assert(ctx->arch >= 9); - + const struct pan_varying_slot *slot = NULL; + bool use_ld_var_buf = false; + if (ctx->varying_layout && ctx->arch >= 9) { pan_varying_layout_require_layout(ctx->varying_layout); - const struct pan_varying_slot *slot = - pan_varying_layout_find_slot(ctx->varying_layout, - sem.location); + slot = pan_varying_layout_find_slot(ctx->varying_layout, sem.location); assert(slot); + use_ld_var_buf = slot->offset < pan_ld_var_buf_off_size(ctx->arch); + } + + nir_def *res; + if (use_ld_var_buf) { const nir_alu_type src_type = slot->alu_type; nir_def *offset_B = nir_imm_int(b, slot->offset); @@ -232,6 +234,7 @@ lower_fs_input_load(struct nir_builder *b, .io_semantics = sem); } } else { + ctx->info->bifrost.uses_ld_var = true; const uint32_t base = nir_intrinsic_base(load); nir_def *idx = nir_imm_int(b, base); @@ -262,12 +265,12 @@ lower_fs_input_load(struct nir_builder *b, bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id, const struct pan_varying_layout *varying_layout, - bool valhall_use_ld_var_buf) + struct pan_shader_info *info) { const struct lower_fs_inputs_ctx ctx = { .arch = pan_arch(gpu_id), .varying_layout = varying_layout, - .valhall_use_ld_var_buf = valhall_use_ld_var_buf, + .info = info, }; return nir_shader_intrinsics_pass(shader, lower_fs_input_load, nir_metadata_control_flow, diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index f63420ceedf..598dacaaf96 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1405,19 +1405,6 @@ panvk_compile_shader(struct panvk_device *dev, /* VS (if known) decides the memory layout */ inputs.varying_layout = vs_varying_layout; -#if PAN_ARCH >= 9 - /* LD_VAR_BUF[_IMM] has a fixed-size offset, limiting its use when we - * can fit all of the generic varyings in the offset field. - * TODO: We could still use LD_VAR_BUF for just the fields that don't - * overflow. - */ - inputs.valhall.use_ld_var_buf = - vs_varying_layout && - vs_varying_layout->generic_size_B <= pan_ld_var_buf_off_size(PAN_ARCH); - variant->desc_info.fs_varying_attr_desc_count = - inputs.valhall.use_ld_var_buf ? 0 : nir->num_inputs; -#endif - panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts, info->robustness, state, &variant->desc_info); @@ -1438,6 +1425,17 @@ panvk_compile_shader(struct panvk_device *dev, panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator); return result; } + + #if PAN_ARCH >= 9 + /* LD_VAR_BUF[_IMM] has a fixed-size offset, when shaders overflow + * that they fall back to LD_VAR[_IMM] and require descriptors. + * TODO: We could only emit descriptors that overflow the offset, + * saving a bit of space. + */ + variant->desc_info.fs_varying_attr_desc_count = + variant->info.bifrost.uses_ld_var ? nir->num_inputs : 0; + #endif + break; }