panvk,panfrost: Always emit ld_var_buf when possible

Previously the driver decided when the backend should use
LD_VAR_BUF[_IMM] instructions based on the total number of varyings
read, falling back to LD_VAR[_IMM] + descriptors when the varying index
could overflow the immediate index in the instructions.  That means that
even adding a single varying read could overflow the index and make
everything fall back to LD_VAR.

With this patch the backend decides when to use LD_VAR_BUF for each
varying load, reporting that decision to the driver.  This helps with
index overflows because only the instruction that actually overflow the
immediate use the LD_VAR fallback, leaving all other instructions on the
fast path.

Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40515>
This commit is contained in:
Lorenzo Rossi 2026-03-18 17:38:54 +01:00 committed by Marge Bot
parent e4e7968806
commit caf5a2640b
6 changed files with 33 additions and 34 deletions

View file

@ -208,8 +208,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
}
if (dev->arch >= 9) {
inputs.valhall.use_ld_var_buf = inputs.varying_layout &&
inputs.varying_layout->generic_size_B <= pan_ld_var_buf_off_size(dev->arch);
/* Always enable this for GL, it avoids crashes when using unbound
* resources. */
inputs.robust_descriptors = true;

View file

@ -7064,7 +7064,7 @@ bifrost_compile_shader_nir(nir_shader *nir,
if (!inputs->is_blend)
NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id,
inputs->varying_layout, inputs->valhall.use_ld_var_buf);
inputs->varying_layout, info);
}
if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {

View file

@ -132,13 +132,6 @@ struct pan_compile_inputs {
/* In multiples of 32bit. */
uint32_t offset;
} fau_consts;
union {
struct {
/* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */
bool use_ld_var_buf;
} valhall;
};
};
enum pan_varying_section {
@ -335,6 +328,13 @@ struct bifrost_shader_info {
* values for flat varyings.
*/
bool uses_flat_shading;
/* Whether there are any LD_VAR[_IMM] instructions that require varying
* descriptors. LD_VAR_BUF[_IMM] do not count as they only need buffer
* descriptors to work correctly. For more details check out
* docs/drivers/panfrost/varyings.rst
*/
bool uses_ld_var;
};
struct midgard_shader_info {

View file

@ -63,7 +63,7 @@ bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
const struct pan_varying_layout *varying_layout,
bool valhall_use_ld_var_buf);
struct pan_shader_info *info);
bool pan_nir_lower_helper_invocation(nir_shader *shader);
bool pan_nir_lower_sample_pos(nir_shader *shader);

View file

@ -169,7 +169,7 @@ pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
struct lower_fs_inputs_ctx {
unsigned arch;
const struct pan_varying_layout *varying_layout;
bool valhall_use_ld_var_buf;
struct pan_shader_info *info;
};
static bool
@ -208,15 +208,17 @@ lower_fs_input_load(struct nir_builder *b,
const unsigned component = nir_intrinsic_component(load);
const unsigned load_comps = load->num_components + component;
nir_def *res;
if (ctx->valhall_use_ld_var_buf) {
assert(ctx->arch >= 9);
const struct pan_varying_slot *slot = NULL;
bool use_ld_var_buf = false;
if (ctx->varying_layout && ctx->arch >= 9) {
pan_varying_layout_require_layout(ctx->varying_layout);
const struct pan_varying_slot *slot =
pan_varying_layout_find_slot(ctx->varying_layout,
sem.location);
slot = pan_varying_layout_find_slot(ctx->varying_layout, sem.location);
assert(slot);
use_ld_var_buf = slot->offset < pan_ld_var_buf_off_size(ctx->arch);
}
nir_def *res;
if (use_ld_var_buf) {
const nir_alu_type src_type = slot->alu_type;
nir_def *offset_B = nir_imm_int(b, slot->offset);
@ -232,6 +234,7 @@ lower_fs_input_load(struct nir_builder *b,
.io_semantics = sem);
}
} else {
ctx->info->bifrost.uses_ld_var = true;
const uint32_t base = nir_intrinsic_base(load);
nir_def *idx = nir_imm_int(b, base);
@ -262,12 +265,12 @@ lower_fs_input_load(struct nir_builder *b,
bool
pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
const struct pan_varying_layout *varying_layout,
bool valhall_use_ld_var_buf)
struct pan_shader_info *info)
{
const struct lower_fs_inputs_ctx ctx = {
.arch = pan_arch(gpu_id),
.varying_layout = varying_layout,
.valhall_use_ld_var_buf = valhall_use_ld_var_buf,
.info = info,
};
return nir_shader_intrinsics_pass(shader, lower_fs_input_load,
nir_metadata_control_flow,

View file

@ -1405,19 +1405,6 @@ panvk_compile_shader(struct panvk_device *dev,
/* VS (if known) decides the memory layout */
inputs.varying_layout = vs_varying_layout;
#if PAN_ARCH >= 9
/* LD_VAR_BUF[_IMM] has a fixed-size offset, limiting its use when we
* can fit all of the generic varyings in the offset field.
* TODO: We could still use LD_VAR_BUF for just the fields that don't
* overflow.
*/
inputs.valhall.use_ld_var_buf =
vs_varying_layout &&
vs_varying_layout->generic_size_B <= pan_ld_var_buf_off_size(PAN_ARCH);
variant->desc_info.fs_varying_attr_desc_count =
inputs.valhall.use_ld_var_buf ? 0 : nir->num_inputs;
#endif
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
info->robustness, state, &variant->desc_info);
@ -1438,6 +1425,17 @@ panvk_compile_shader(struct panvk_device *dev,
panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
return result;
}
#if PAN_ARCH >= 9
/* LD_VAR_BUF[_IMM] has a fixed-size offset, when shaders overflow
* that they fall back to LD_VAR[_IMM] and require descriptors.
* TODO: We could only emit descriptors that overflow the offset,
* saving a bit of space.
*/
variant->desc_info.fs_varying_attr_desc_count =
variant->info.bifrost.uses_ld_var ? nir->num_inputs : 0;
#endif
break;
}