diff --git a/docs/features.txt b/docs/features.txt index 208f6cab2cc..9d335141383 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -336,6 +336,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_EXT_semaphore_win32 DONE (zink, d3d12) GL_EXT_shader_clock DONE (all drivers that support GL_ARB_shader_clock) GL_EXT_shader_group_vote DONE (all drivers that support GL_ARB_shader_group_vote) + GL_EXT_shader_pixel_local_storage DONE (panfrost/v6+) GL_EXT_shader_realtime_clock DONE (panfrost/v6+) GL_EXT_sRGB_write_control DONE (all drivers that support GLES 3.0+) GL_EXT_texture_compression_astc_decode_mode DONE (panfrost) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index c8263738c50..afee361f87b 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -11,3 +11,4 @@ VK_EXT_shader_uniform_buffer_unsized_array on NVK, RADV VK_EXT_device_memory_report on panvk VK_VALVE_video_encode_rgb_conversion on radv VK_EXT_custom_resolve on RADV +GL_EXT_shader_pixel_local_storage on Panfrost v6+ diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c530cd21325..0224bbb9e10 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -4459,6 +4459,17 @@ emit_write_timestamp(struct panfrost_batch *batch, JOBX(emit_write_timestamp)(batch, dst, offset); } +static uint64_t +get_conv_desc(enum pipe_format fmt, unsigned rt, + unsigned force_size, bool dithered) +{ +#if PAN_ARCH >= 6 + return GENX(pan_blend_get_internal_desc)(fmt, rt, force_size, dithered) >> 32; +#else + return 0; +#endif +} + void GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) { @@ -4479,6 +4490,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) screen->vtbl.mtk_detile = panfrost_mtk_detile_compute; screen->vtbl.emit_write_timestamp = emit_write_timestamp; screen->vtbl.select_tile_size = GENX(pan_select_tile_size); + screen->vtbl.get_conv_desc = get_conv_desc; pan_blend_shader_cache_init(&dev->blend_shaders, panfrost_device_gpu_id(dev), dev->kmod.props.gpu_variant, diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h index 65be2b38212..627f53e9193 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.h +++ b/src/gallium/drivers/panfrost/pan_cmdstream.h @@ -232,6 +232,10 @@ panfrost_fs_required(struct panfrost_compiled_shader *fs, if (PAN_ARCH <= 5 && zsa->base.alpha_func != PIPE_FUNC_ALWAYS) return true; + /* if pixel local storage is enabled we need to execute */ + if (state->pls_enabled) + return true; + /* If colour is written we need to execute */ for (unsigned i = 0; i < state->nr_cbufs; ++i) { if (state->cbufs[i].texture && blend->info[i].enabled) diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index c81ee68bab4..487148c0a10 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -495,6 +495,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch, fb->nr_samples = util_framebuffer_get_num_samples(&batch->key); fb->force_samples = (batch->line_smoothing == U_TRISTATE_YES) ? 16 : 0; fb->rt_count = batch->key.nr_cbufs; + fb->pls_enabled = batch->key.pls_enabled; fb->sprite_coord_origin = (batch->sprite_coord_origin == U_TRISTATE_YES); fb->first_provoking_vertex = (batch->first_provoking_vertex == U_TRISTATE_YES); diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index b1d15887b87..08aafac6c6d 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -731,6 +731,10 @@ panfrost_init_screen_caps(struct panfrost_screen *screen) caps->shader_realtime_clock = dev->arch >= 6 && dev->kmod.props.gpu_can_query_timestamp; + /* pixel_local_storage is initially for valhall and bifrost only */ + caps->shader_pixel_local_storage_fast_size = + caps->shader_pixel_local_storage_size = (dev->arch >= 6) ? 16 : 0; + caps->vs_instanceid = true; caps->texture_multisample = true; caps->surface_sample_count = true; diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index e1c17c65f5e..e14a3b8cb9c 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -110,6 +110,10 @@ struct panfrost_vtable { /* Run a compute shader to detile an MTK 16L32 image */ void (*mtk_detile)(struct panfrost_context *ctx, struct pipe_blit_info *info); + + /* construct a render target blend descriptor */ + uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt, + unsigned force_size, bool dithered); }; struct panfrost_screen { diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 0c05e15ef47..b3fbf8511b4 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -145,6 +145,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, struct pan_compile_inputs inputs = { .gpu_id = panfrost_device_gpu_id(dev), .gpu_variant = dev->kmod.props.gpu_variant, + .get_conv_desc = screen->vtbl.get_conv_desc, }; /* Lower this early so the backends don't have to worry about it */ @@ -502,6 +503,42 @@ panfrost_create_shader_state(struct pipe_context *pctx, so->stream_output = cso->stream_output; so->nir = nir; + /* PLS lowering is not taken care of by glsl_to_nir(), so do it here. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && + nir->info.fs.accesses_pixel_local_storage) { + /* Try to optimize the case where inout PLS vars are never + * read/written to. Needs to be called before + * nir_lower_io_vars_to_temporaries() because the copy_derefs + * inserted there prevent us from detecting PLS usage. + */ + NIR_PASS(_, nir, nir_downgrade_pls_vars); + + /* Lower PLS vars to temporaries before we lower IOs. */ + NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries, + nir_shader_get_entrypoint(nir), nir_var_any_pixel_local); + + /* We need to lower all the copy_deref's introduced by lower_io_to- + * _temporaries before calling nir_lower_io. + */ + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + + /* Lower all PLS IOs. */ + NIR_PASS(_, nir, nir_lower_io, nir_var_any_pixel_local, glsl_type_size, + 0); + + /* Lower and remove dead derefs and variables to clean up the IR. */ + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + + /* Re-run gather_info() to get the latest accesses_pixel_local_storage + * state. + */ + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + } + /* gl_FragColor needs to be lowered before lowering I/O, do that now */ if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) { diff --git a/src/panfrost/ci/panfrost-g52-gles2-extensions.txt b/src/panfrost/ci/panfrost-g52-gles2-extensions.txt index 9c6ddf0f0a7..028a8cd19d5 100644 --- a/src/panfrost/ci/panfrost-g52-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g52-gles2-extensions.txt @@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent GL_EXT_shader_implicit_conversions GL_EXT_shader_integer_mix GL_EXT_shader_io_blocks +GL_EXT_shader_pixel_local_storage GL_EXT_shader_realtime_clock GL_EXT_shadow_samplers GL_EXT_sRGB diff --git a/src/panfrost/ci/panfrost-g57-gles2-extensions.txt b/src/panfrost/ci/panfrost-g57-gles2-extensions.txt index ad03ddf655a..c684b25453f 100644 --- a/src/panfrost/ci/panfrost-g57-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g57-gles2-extensions.txt @@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent GL_EXT_shader_implicit_conversions GL_EXT_shader_integer_mix GL_EXT_shader_io_blocks +GL_EXT_shader_pixel_local_storage GL_EXT_shader_realtime_clock GL_EXT_shadow_samplers GL_EXT_sRGB diff --git a/src/panfrost/ci/panfrost-g610-gles2-extensions.txt b/src/panfrost/ci/panfrost-g610-gles2-extensions.txt index ad03ddf655a..c684b25453f 100644 --- a/src/panfrost/ci/panfrost-g610-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g610-gles2-extensions.txt @@ -45,6 +45,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent GL_EXT_shader_implicit_conversions GL_EXT_shader_integer_mix GL_EXT_shader_io_blocks +GL_EXT_shader_pixel_local_storage GL_EXT_shader_realtime_clock GL_EXT_shadow_samplers GL_EXT_sRGB diff --git a/src/panfrost/ci/panfrost-g72-gles2-extensions.txt b/src/panfrost/ci/panfrost-g72-gles2-extensions.txt index 9c6ddf0f0a7..028a8cd19d5 100644 --- a/src/panfrost/ci/panfrost-g72-gles2-extensions.txt +++ b/src/panfrost/ci/panfrost-g72-gles2-extensions.txt @@ -46,6 +46,7 @@ GL_EXT_shader_framebuffer_fetch_non_coherent GL_EXT_shader_implicit_conversions GL_EXT_shader_integer_mix GL_EXT_shader_io_blocks +GL_EXT_shader_pixel_local_storage GL_EXT_shader_realtime_clock GL_EXT_shadow_samplers GL_EXT_sRGB