panfrost: enable EXT_shader_pixel_local_storage

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37110>
This commit is contained in:
Ryan Mckeever 2025-10-03 17:11:04 -07:00 committed by Marge Bot
parent c15a43cce0
commit 298ad17b81
12 changed files with 68 additions and 0 deletions

View file

@ -336,6 +336,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_EXT_semaphore_win32 DONE (zink, d3d12)
GL_EXT_shader_clock DONE (all drivers that support GL_ARB_shader_clock)
GL_EXT_shader_group_vote DONE (all drivers that support GL_ARB_shader_group_vote)
GL_EXT_shader_pixel_local_storage DONE (panfrost/v6+)
GL_EXT_shader_realtime_clock DONE (panfrost/v6+)
GL_EXT_sRGB_write_control DONE (all drivers that support GLES 3.0+)
GL_EXT_texture_compression_astc_decode_mode DONE (panfrost)

View file

@ -11,3 +11,4 @@ VK_EXT_shader_uniform_buffer_unsized_array on NVK, RADV
VK_EXT_device_memory_report on panvk
VK_VALVE_video_encode_rgb_conversion on radv
VK_EXT_custom_resolve on RADV
GL_EXT_shader_pixel_local_storage on Panfrost v6+

View file

@ -4459,6 +4459,17 @@ emit_write_timestamp(struct panfrost_batch *batch,
JOBX(emit_write_timestamp)(batch, dst, offset);
}
static uint64_t
get_conv_desc(enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered)
{
#if PAN_ARCH >= 6
return GENX(pan_blend_get_internal_desc)(fmt, rt, force_size, dithered) >> 32;
#else
return 0;
#endif
}
void
GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
{
@ -4479,6 +4490,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
screen->vtbl.mtk_detile = panfrost_mtk_detile_compute;
screen->vtbl.emit_write_timestamp = emit_write_timestamp;
screen->vtbl.select_tile_size = GENX(pan_select_tile_size);
screen->vtbl.get_conv_desc = get_conv_desc;
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
dev->kmod.props.gpu_variant,

View file

@ -232,6 +232,10 @@ panfrost_fs_required(struct panfrost_compiled_shader *fs,
if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
return true;
/* if pixel local storage is enabled we need to execute */
if (state->pls_enabled)
return true;
/* If colour is written we need to execute */
for (unsigned i = 0; i < state->nr_cbufs; ++i) {
if (state->cbufs[i].texture && blend->info[i].enabled)

View file

@ -495,6 +495,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
fb->force_samples = (batch->line_smoothing == U_TRISTATE_YES) ? 16 : 0;
fb->rt_count = batch->key.nr_cbufs;
fb->pls_enabled = batch->key.pls_enabled;
fb->sprite_coord_origin = (batch->sprite_coord_origin == U_TRISTATE_YES);
fb->first_provoking_vertex =
(batch->first_provoking_vertex == U_TRISTATE_YES);

View file

@ -731,6 +731,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
caps->shader_realtime_clock = dev->arch >= 6 &&
dev->kmod.props.gpu_can_query_timestamp;
/* pixel_local_storage is initially for valhall and bifrost only */
caps->shader_pixel_local_storage_fast_size =
caps->shader_pixel_local_storage_size = (dev->arch >= 6) ? 16 : 0;
caps->vs_instanceid = true;
caps->texture_multisample = true;
caps->surface_sample_count = true;

View file

@ -110,6 +110,10 @@ struct panfrost_vtable {
/* Run a compute shader to detile an MTK 16L32 image */
void (*mtk_detile)(struct panfrost_context *ctx, struct pipe_blit_info *info);
/* construct a render target blend descriptor */
uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered);
};
struct panfrost_screen {

View file

@ -145,6 +145,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
struct pan_compile_inputs inputs = {
.gpu_id = panfrost_device_gpu_id(dev),
.gpu_variant = dev->kmod.props.gpu_variant,
.get_conv_desc = screen->vtbl.get_conv_desc,
};
/* Lower this early so the backends don't have to worry about it */
@ -502,6 +503,42 @@ panfrost_create_shader_state(struct pipe_context *pctx,
so->stream_output = cso->stream_output;
so->nir = nir;
/* PLS lowering is not taken care of by glsl_to_nir(), so do it here. */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir->info.fs.accesses_pixel_local_storage) {
/* Try to optimize the case where inout PLS vars are never
* read/written to. Needs to be called before
* nir_lower_io_vars_to_temporaries() because the copy_derefs
* inserted there prevent us from detecting PLS usage.
*/
NIR_PASS(_, nir, nir_downgrade_pls_vars);
/* Lower PLS vars to temporaries before we lower IOs. */
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
nir_shader_get_entrypoint(nir), nir_var_any_pixel_local);
/* We need to lower all the copy_deref's introduced by lower_io_to-
* _temporaries before calling nir_lower_io.
*/
NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_lower_var_copies);
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
/* Lower all PLS IOs. */
NIR_PASS(_, nir, nir_lower_io, nir_var_any_pixel_local, glsl_type_size,
0);
/* Lower and remove dead derefs and variables to clean up the IR. */
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
/* Re-run gather_info() to get the latest accesses_pixel_local_storage
* state.
*/
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
}
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {

View file

@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
GL_EXT_shader_implicit_conversions
GL_EXT_shader_integer_mix
GL_EXT_shader_io_blocks
GL_EXT_shader_pixel_local_storage
GL_EXT_shader_realtime_clock
GL_EXT_shadow_samplers
GL_EXT_sRGB

View file

@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
GL_EXT_shader_implicit_conversions
GL_EXT_shader_integer_mix
GL_EXT_shader_io_blocks
GL_EXT_shader_pixel_local_storage
GL_EXT_shader_realtime_clock
GL_EXT_shadow_samplers
GL_EXT_sRGB

View file

@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
GL_EXT_shader_implicit_conversions
GL_EXT_shader_integer_mix
GL_EXT_shader_io_blocks
GL_EXT_shader_pixel_local_storage
GL_EXT_shader_realtime_clock
GL_EXT_shadow_samplers
GL_EXT_sRGB

View file

@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
GL_EXT_shader_implicit_conversions
GL_EXT_shader_integer_mix
GL_EXT_shader_io_blocks
GL_EXT_shader_pixel_local_storage
GL_EXT_shader_realtime_clock
GL_EXT_shadow_samplers
GL_EXT_sRGB