panvk: Limit AD allocation to max var loads in v9+

Introduce a varying load count pass to get the maximum amount of varying
loads from a fragment shader (prior to optimization passes), in order to
only allocate as many Attribute Descriptors as required. This will
generally lead to smaller buffers in SRT0 for fragment shaders.

As the amount of ADs is now dynamic based on the shader, we need to
lower varying loads early for fragment shaders in v9+, as the amount of
ADs will determine the offset for dummy_sampler, required during
nir_lower_descriptors.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32969>
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2025-01-20 10:59:10 +01:00 committed by Marge Bot
parent 6d5ae5b3af
commit de86641d3f
4 changed files with 25 additions and 5 deletions

View file

@ -239,7 +239,9 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
const struct panvk_descriptor_state *desc_state = const struct panvk_descriptor_state *desc_state =
&cmdbuf->state.gfx.desc_state; &cmdbuf->state.gfx.desc_state;
uint32_t desc_count = fs->desc_info.dyn_bufs.count + MAX_VARYING + 1; uint32_t num_varying_attr_descs = fs->desc_info.max_varying_loads;
uint32_t desc_count =
fs->desc_info.dyn_bufs.count + num_varying_attr_descs + 1;
struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem( struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem(
cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
struct panvk_opaque_desc *descs = driver_set.cpu; struct panvk_opaque_desc *descs = driver_set.cpu;
@ -250,12 +252,13 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
emit_varying_descs(cmdbuf, (struct mali_attribute_packed *)(&descs[0])); emit_varying_descs(cmdbuf, (struct mali_attribute_packed *)(&descs[0]));
/* Dummy sampler always comes right after the varyings. */ /* Dummy sampler always comes right after the varyings. */
pan_cast_and_pack(&descs[MAX_VARYING], SAMPLER, cfg) { pan_cast_and_pack(&descs[num_varying_attr_descs], SAMPLER, cfg) {
cfg.clamp_integer_array_indices = false; cfg.clamp_integer_array_indices = false;
} }
panvk_per_arch(cmd_fill_dyn_bufs)( panvk_per_arch(cmd_fill_dyn_bufs)(
desc_state, fs, (struct mali_buffer_packed *)(&descs[1 + MAX_VARYING])); desc_state, fs,
(struct mali_buffer_packed *)(&descs[num_varying_attr_descs + 1]));
fs_desc_state->driver_set.dev_addr = driver_set.gpu; fs_desc_state->driver_set.dev_addr = driver_set.gpu;
fs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE; fs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE;

View file

@ -277,6 +277,7 @@ struct panvk_shader {
uint32_t map[MAX_DYNAMIC_BUFFERS]; uint32_t map[MAX_DYNAMIC_BUFFERS];
uint32_t count; uint32_t count;
} dyn_bufs; } dyn_bufs;
uint32_t max_varying_loads;
#endif #endif
} desc_info; } desc_info;

View file

@ -61,6 +61,7 @@ struct panvk_shader_desc_info {
uint32_t dummy_sampler_handle; uint32_t dummy_sampler_handle;
uint32_t dyn_bufs_start; uint32_t dyn_bufs_start;
struct panvk_shader_desc_map dyn_bufs; struct panvk_shader_desc_map dyn_bufs;
uint32_t num_varying_attr_descs;
#endif #endif
}; };
@ -1033,7 +1034,7 @@ create_copy_table(nir_shader *nir, struct lower_desc_ctx *ctx)
break; break;
case MESA_SHADER_FRAGMENT: case MESA_SHADER_FRAGMENT:
/* Dummy sampler comes after the varyings. */ /* Dummy sampler comes after the varyings. */
dummy_sampler_idx = MAX_VARYING; dummy_sampler_idx = desc_info->num_varying_attr_descs;
break; break;
case MESA_SHADER_COMPUTE: case MESA_SHADER_COMPUTE:
dummy_sampler_idx = 0; dummy_sampler_idx = 0;
@ -1256,6 +1257,9 @@ panvk_per_arch(nir_lower_descriptors)(
if (!progress) if (!progress)
goto out; goto out;
#if PAN_ARCH >= 9
ctx.desc_info.num_varying_attr_descs = shader->desc_info.max_varying_loads;
#endif
create_copy_table(nir, &ctx); create_copy_table(nir, &ctx);
upload_shader_desc_info(dev, shader, &ctx.desc_info); upload_shader_desc_info(dev, shader, &ctx.desc_info);

View file

@ -650,6 +650,17 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
} }
#endif #endif
/* Lower input intrinsics for fragment shaders early to get the max
* number of varying loads, as this number is required during descriptor
* lowering for v9+. */
if (stage == MESA_SHADER_FRAGMENT) {
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
stage);
#if PAN_ARCH >= 9
shader->desc_info.max_varying_loads = nir->num_inputs;
#endif
}
panvk_per_arch(nir_lower_descriptors)(nir, dev, rs, set_layout_count, panvk_per_arch(nir_lower_descriptors)(nir, dev, rs, set_layout_count,
set_layouts, shader); set_layouts, shader);
@ -706,7 +717,8 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
var->data.location <= VERT_ATTRIB_GENERIC15); var->data.location <= VERT_ATTRIB_GENERIC15);
var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0; var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
} }
} else { } else if (stage != MESA_SHADER_FRAGMENT) {
/* Input varyings in fragment shader have been lowered early. */
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
stage); stage);
} }