mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 20:18:12 +02:00
panvk,panfrost: Always emit ld_var_buf when possible
Previously the driver decided when the backend should use LD_VAR_BUF[_IMM] instructions based on the total number of varyings read, falling back to LD_VAR[_IMM] + descriptors when the varying index could overflow the immediate index in the instructions. That means that even adding a single varying read could overflow the index and make everything fall back to LD_VAR. With this patch the backend decides when to use LD_VAR_BUF for each varying load, reporting that decision to the driver. This helps with index overflows because only the instruction that actually overflow the immediate use the LD_VAR fallback, leaving all other instructions on the fast path. Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40515>
This commit is contained in:
parent
e4e7968806
commit
caf5a2640b
6 changed files with 33 additions and 34 deletions
|
|
@ -208,8 +208,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
}
|
||||
|
||||
if (dev->arch >= 9) {
|
||||
inputs.valhall.use_ld_var_buf = inputs.varying_layout &&
|
||||
inputs.varying_layout->generic_size_B <= pan_ld_var_buf_off_size(dev->arch);
|
||||
/* Always enable this for GL, it avoids crashes when using unbound
|
||||
* resources. */
|
||||
inputs.robust_descriptors = true;
|
||||
|
|
|
|||
|
|
@ -7064,7 +7064,7 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
|||
|
||||
if (!inputs->is_blend)
|
||||
NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id,
|
||||
inputs->varying_layout, inputs->valhall.use_ld_var_buf);
|
||||
inputs->varying_layout, info);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
|
||||
|
|
|
|||
|
|
@ -132,13 +132,6 @@ struct pan_compile_inputs {
|
|||
/* In multiples of 32bit. */
|
||||
uint32_t offset;
|
||||
} fau_consts;
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */
|
||||
bool use_ld_var_buf;
|
||||
} valhall;
|
||||
};
|
||||
};
|
||||
|
||||
enum pan_varying_section {
|
||||
|
|
@ -335,6 +328,13 @@ struct bifrost_shader_info {
|
|||
* values for flat varyings.
|
||||
*/
|
||||
bool uses_flat_shading;
|
||||
|
||||
/* Whether there are any LD_VAR[_IMM] instructions that require varying
|
||||
* descriptors. LD_VAR_BUF[_IMM] do not count as they only need buffer
|
||||
* descriptors to work correctly. For more details check out
|
||||
* docs/drivers/panfrost/varyings.rst
|
||||
*/
|
||||
bool uses_ld_var;
|
||||
};
|
||||
|
||||
struct midgard_shader_info {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
|
|||
|
||||
bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
|
||||
const struct pan_varying_layout *varying_layout,
|
||||
bool valhall_use_ld_var_buf);
|
||||
struct pan_shader_info *info);
|
||||
|
||||
bool pan_nir_lower_helper_invocation(nir_shader *shader);
|
||||
bool pan_nir_lower_sample_pos(nir_shader *shader);
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
|
|||
struct lower_fs_inputs_ctx {
|
||||
unsigned arch;
|
||||
const struct pan_varying_layout *varying_layout;
|
||||
bool valhall_use_ld_var_buf;
|
||||
struct pan_shader_info *info;
|
||||
};
|
||||
|
||||
static bool
|
||||
|
|
@ -208,15 +208,17 @@ lower_fs_input_load(struct nir_builder *b,
|
|||
const unsigned component = nir_intrinsic_component(load);
|
||||
const unsigned load_comps = load->num_components + component;
|
||||
|
||||
nir_def *res;
|
||||
if (ctx->valhall_use_ld_var_buf) {
|
||||
assert(ctx->arch >= 9);
|
||||
|
||||
const struct pan_varying_slot *slot = NULL;
|
||||
bool use_ld_var_buf = false;
|
||||
if (ctx->varying_layout && ctx->arch >= 9) {
|
||||
pan_varying_layout_require_layout(ctx->varying_layout);
|
||||
const struct pan_varying_slot *slot =
|
||||
pan_varying_layout_find_slot(ctx->varying_layout,
|
||||
sem.location);
|
||||
slot = pan_varying_layout_find_slot(ctx->varying_layout, sem.location);
|
||||
assert(slot);
|
||||
use_ld_var_buf = slot->offset < pan_ld_var_buf_off_size(ctx->arch);
|
||||
}
|
||||
|
||||
nir_def *res;
|
||||
if (use_ld_var_buf) {
|
||||
const nir_alu_type src_type = slot->alu_type;
|
||||
nir_def *offset_B = nir_imm_int(b, slot->offset);
|
||||
|
||||
|
|
@ -232,6 +234,7 @@ lower_fs_input_load(struct nir_builder *b,
|
|||
.io_semantics = sem);
|
||||
}
|
||||
} else {
|
||||
ctx->info->bifrost.uses_ld_var = true;
|
||||
const uint32_t base = nir_intrinsic_base(load);
|
||||
nir_def *idx = nir_imm_int(b, base);
|
||||
|
||||
|
|
@ -262,12 +265,12 @@ lower_fs_input_load(struct nir_builder *b,
|
|||
bool
|
||||
pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
|
||||
const struct pan_varying_layout *varying_layout,
|
||||
bool valhall_use_ld_var_buf)
|
||||
struct pan_shader_info *info)
|
||||
{
|
||||
const struct lower_fs_inputs_ctx ctx = {
|
||||
.arch = pan_arch(gpu_id),
|
||||
.varying_layout = varying_layout,
|
||||
.valhall_use_ld_var_buf = valhall_use_ld_var_buf,
|
||||
.info = info,
|
||||
};
|
||||
return nir_shader_intrinsics_pass(shader, lower_fs_input_load,
|
||||
nir_metadata_control_flow,
|
||||
|
|
|
|||
|
|
@ -1405,19 +1405,6 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
/* VS (if known) decides the memory layout */
|
||||
inputs.varying_layout = vs_varying_layout;
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
/* LD_VAR_BUF[_IMM] has a fixed-size offset, limiting its use when we
|
||||
* can fit all of the generic varyings in the offset field.
|
||||
* TODO: We could still use LD_VAR_BUF for just the fields that don't
|
||||
* overflow.
|
||||
*/
|
||||
inputs.valhall.use_ld_var_buf =
|
||||
vs_varying_layout &&
|
||||
vs_varying_layout->generic_size_B <= pan_ld_var_buf_off_size(PAN_ARCH);
|
||||
variant->desc_info.fs_varying_attr_desc_count =
|
||||
inputs.valhall.use_ld_var_buf ? 0 : nir->num_inputs;
|
||||
#endif
|
||||
|
||||
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
|
||||
info->robustness, state, &variant->desc_info);
|
||||
|
||||
|
|
@ -1438,6 +1425,17 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||
panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
/* LD_VAR_BUF[_IMM] has a fixed-size offset, when shaders overflow
|
||||
* that they fall back to LD_VAR[_IMM] and require descriptors.
|
||||
* TODO: We could only emit descriptors that overflow the offset,
|
||||
* saving a bit of space.
|
||||
*/
|
||||
variant->desc_info.fs_varying_attr_desc_count =
|
||||
variant->info.bifrost.uses_ld_var ? nir->num_inputs : 0;
|
||||
#endif
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue