mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
panfrost: enable EXT_shader_pixel_local_storage
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37110>
This commit is contained in:
parent
c15a43cce0
commit
298ad17b81
12 changed files with 68 additions and 0 deletions
|
|
@ -336,6 +336,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
|
|||
GL_EXT_semaphore_win32 DONE (zink, d3d12)
|
||||
GL_EXT_shader_clock DONE (all drivers that support GL_ARB_shader_clock)
|
||||
GL_EXT_shader_group_vote DONE (all drivers that support GL_ARB_shader_group_vote)
|
||||
GL_EXT_shader_pixel_local_storage DONE (panfrost/v6+)
|
||||
GL_EXT_shader_realtime_clock DONE (panfrost/v6+)
|
||||
GL_EXT_sRGB_write_control DONE (all drivers that support GLES 3.0+)
|
||||
GL_EXT_texture_compression_astc_decode_mode DONE (panfrost)
|
||||
|
|
|
|||
|
|
@ -11,3 +11,4 @@ VK_EXT_shader_uniform_buffer_unsized_array on NVK, RADV
|
|||
VK_EXT_device_memory_report on panvk
|
||||
VK_VALVE_video_encode_rgb_conversion on radv
|
||||
VK_EXT_custom_resolve on RADV
|
||||
GL_EXT_shader_pixel_local_storage on Panfrost v6+
|
||||
|
|
|
|||
|
|
@ -4459,6 +4459,17 @@ emit_write_timestamp(struct panfrost_batch *batch,
|
|||
JOBX(emit_write_timestamp)(batch, dst, offset);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
get_conv_desc(enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered)
|
||||
{
|
||||
#if PAN_ARCH >= 6
|
||||
return GENX(pan_blend_get_internal_desc)(fmt, rt, force_size, dithered) >> 32;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
|
||||
{
|
||||
|
|
@ -4479,6 +4490,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
|
|||
screen->vtbl.mtk_detile = panfrost_mtk_detile_compute;
|
||||
screen->vtbl.emit_write_timestamp = emit_write_timestamp;
|
||||
screen->vtbl.select_tile_size = GENX(pan_select_tile_size);
|
||||
screen->vtbl.get_conv_desc = get_conv_desc;
|
||||
|
||||
pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev),
|
||||
dev->kmod.props.gpu_variant,
|
||||
|
|
|
|||
|
|
@ -232,6 +232,10 @@ panfrost_fs_required(struct panfrost_compiled_shader *fs,
|
|||
if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS)
|
||||
return true;
|
||||
|
||||
/* if pixel local storage is enabled we need to execute */
|
||||
if (state->pls_enabled)
|
||||
return true;
|
||||
|
||||
/* If colour is written we need to execute */
|
||||
for (unsigned i = 0; i < state->nr_cbufs; ++i) {
|
||||
if (state->cbufs[i].texture && blend->info[i].enabled)
|
||||
|
|
|
|||
|
|
@ -495,6 +495,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
|
|||
fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
|
||||
fb->force_samples = (batch->line_smoothing == U_TRISTATE_YES) ? 16 : 0;
|
||||
fb->rt_count = batch->key.nr_cbufs;
|
||||
fb->pls_enabled = batch->key.pls_enabled;
|
||||
fb->sprite_coord_origin = (batch->sprite_coord_origin == U_TRISTATE_YES);
|
||||
fb->first_provoking_vertex =
|
||||
(batch->first_provoking_vertex == U_TRISTATE_YES);
|
||||
|
|
|
|||
|
|
@ -731,6 +731,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
|
|||
caps->shader_realtime_clock = dev->arch >= 6 &&
|
||||
dev->kmod.props.gpu_can_query_timestamp;
|
||||
|
||||
/* pixel_local_storage is initially for valhall and bifrost only */
|
||||
caps->shader_pixel_local_storage_fast_size =
|
||||
caps->shader_pixel_local_storage_size = (dev->arch >= 6) ? 16 : 0;
|
||||
|
||||
caps->vs_instanceid = true;
|
||||
caps->texture_multisample = true;
|
||||
caps->surface_sample_count = true;
|
||||
|
|
|
|||
|
|
@ -110,6 +110,10 @@ struct panfrost_vtable {
|
|||
|
||||
/* Run a compute shader to detile an MTK 16L32 image */
|
||||
void (*mtk_detile)(struct panfrost_context *ctx, struct pipe_blit_info *info);
|
||||
|
||||
/* construct a render target blend descriptor */
|
||||
uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered);
|
||||
};
|
||||
|
||||
struct panfrost_screen {
|
||||
|
|
|
|||
|
|
@ -145,6 +145,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
|||
struct pan_compile_inputs inputs = {
|
||||
.gpu_id = panfrost_device_gpu_id(dev),
|
||||
.gpu_variant = dev->kmod.props.gpu_variant,
|
||||
.get_conv_desc = screen->vtbl.get_conv_desc,
|
||||
};
|
||||
|
||||
/* Lower this early so the backends don't have to worry about it */
|
||||
|
|
@ -502,6 +503,42 @@ panfrost_create_shader_state(struct pipe_context *pctx,
|
|||
so->stream_output = cso->stream_output;
|
||||
so->nir = nir;
|
||||
|
||||
/* PLS lowering is not taken care of by glsl_to_nir(), so do it here. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
nir->info.fs.accesses_pixel_local_storage) {
|
||||
/* Try to optimize the case where inout PLS vars are never
|
||||
* read/written to. Needs to be called before
|
||||
* nir_lower_io_vars_to_temporaries() because the copy_derefs
|
||||
* inserted there prevent us from detecting PLS usage.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_downgrade_pls_vars);
|
||||
|
||||
/* Lower PLS vars to temporaries before we lower IOs. */
|
||||
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir), nir_var_any_pixel_local);
|
||||
|
||||
/* We need to lower all the copy_deref's introduced by lower_io_to-
|
||||
* _temporaries before calling nir_lower_io.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
|
||||
|
||||
/* Lower all PLS IOs. */
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_any_pixel_local, glsl_type_size,
|
||||
0);
|
||||
|
||||
/* Lower and remove dead derefs and variables to clean up the IR. */
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
/* Re-run gather_info() to get the latest accesses_pixel_local_storage
|
||||
* state.
|
||||
*/
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
}
|
||||
|
||||
/* gl_FragColor needs to be lowered before lowering I/O, do that now */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
|
|||
GL_EXT_shader_implicit_conversions
|
||||
GL_EXT_shader_integer_mix
|
||||
GL_EXT_shader_io_blocks
|
||||
GL_EXT_shader_pixel_local_storage
|
||||
GL_EXT_shader_realtime_clock
|
||||
GL_EXT_shadow_samplers
|
||||
GL_EXT_sRGB
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
|
|||
GL_EXT_shader_implicit_conversions
|
||||
GL_EXT_shader_integer_mix
|
||||
GL_EXT_shader_io_blocks
|
||||
GL_EXT_shader_pixel_local_storage
|
||||
GL_EXT_shader_realtime_clock
|
||||
GL_EXT_shadow_samplers
|
||||
GL_EXT_sRGB
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
|
|||
GL_EXT_shader_implicit_conversions
|
||||
GL_EXT_shader_integer_mix
|
||||
GL_EXT_shader_io_blocks
|
||||
GL_EXT_shader_pixel_local_storage
|
||||
GL_EXT_shader_realtime_clock
|
||||
GL_EXT_shadow_samplers
|
||||
GL_EXT_sRGB
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent
|
|||
GL_EXT_shader_implicit_conversions
|
||||
GL_EXT_shader_integer_mix
|
||||
GL_EXT_shader_io_blocks
|
||||
GL_EXT_shader_pixel_local_storage
|
||||
GL_EXT_shader_realtime_clock
|
||||
GL_EXT_shadow_samplers
|
||||
GL_EXT_sRGB
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue