anv: fix self dependency computation

Some upcoming changes in the runtime will make it impossible to rely
on the pipeline or runtime information to know whether a fragment
shader has input attachments.

Instead we gather that information at compile time and store it in our
shader bind_map.

At runtime we check whether the fragment shader has input attachments
and whether those map to the runtime depth/stencil input attachments
to set the 3DSTATE_PS_EXTRA::PixelShaderKillsPixel.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: d2f7b6d5a7 ("anv: implement VK_KHR_dynamic_rendering_local_read")
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
(cherry picked from commit e321c438dc)
This commit is contained in:
Lionel Landwerlin 2025-04-08 21:06:30 +03:00 committed by Eric Engestrom
parent fc46313072
commit 499324de9b
7 changed files with 50 additions and 18 deletions

View file

@ -2314,7 +2314,7 @@
"description": "anv: fix self dependency computation",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "d2f7b6d5a7625f916baa9a9ec110a1799c045f49",
"notes": null

View file

@ -47,6 +47,8 @@ enum binding_property {
struct apply_pipeline_layout_state {
const struct anv_physical_device *pdevice;
struct anv_pipeline_bind_map *bind_map;
const struct anv_pipeline_sets_layout *layout;
nir_address_format desc_addr_format;
nir_address_format ssbo_addr_format;
@ -211,6 +213,18 @@ add_tex_src_binding(struct apply_pipeline_layout_state *state,
struct anv_binding_apply_layout *layout =
add_deref_src_binding(state, tex->src[deref_src_idx].src);
/* Track input attachments use */
nir_variable *var =
nir_deref_instr_get_variable(
nir_src_as_deref(tex->src[deref_src_idx].src));
if (var->data.fb_fetch_output) {
assert(var->data.index == NIR_VARIABLE_NO_INDEX ||
var->data.index < MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS);
const uint32_t index = var->data.index == NIR_VARIABLE_NO_INDEX ?
MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : var->data.index;
BITSET_SET(state->bind_map->input_attachments, index);
}
/* This is likely a fallout of Wa_14020375314 but hasn't fully be
* understood by HW people yet.
*
@ -2214,6 +2228,7 @@ anv_nir_apply_pipeline_layout(nir_shader *shader,
struct apply_pipeline_layout_state state = {
.pdevice = pdevice,
.layout = layout,
.bind_map = map,
.desc_addr_format = bindless_stage ?
nir_address_format_64bit_global_32bit_offset :
nir_address_format_32bit_index_offset,

View file

@ -920,7 +920,6 @@ get_features(const struct anv_physical_device *pdevice,
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)

View file

@ -448,6 +448,10 @@ anv_shader_bin_create(struct anv_device *device,
bind_map->embedded_sampler_count);
shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
typed_memcpy(shader->bind_map.input_attachments,
bind_map->input_attachments,
ARRAY_SIZE(bind_map->input_attachments));
typed_memcpy(kernel_args, bind_map->kernel_args,
bind_map->kernel_arg_count);
shader->bind_map.kernel_args = kernel_args;
@ -524,6 +528,8 @@ anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
shader->bind_map.embedded_sampler_count *
sizeof(*shader->bind_map.embedded_sampler_to_binding));
blob_write_bytes(blob, shader->bind_map.input_attachments,
sizeof(shader->bind_map.input_attachments));
blob_write_bytes(blob, shader->bind_map.kernel_args,
shader->bind_map.kernel_arg_count *
sizeof(*shader->bind_map.kernel_args));
@ -600,6 +606,8 @@ anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
bind_map.embedded_sampler_to_binding = (void *)
blob_read_bytes(blob, bind_map.embedded_sampler_count *
sizeof(*bind_map.embedded_sampler_to_binding));
blob_copy_bytes(blob, bind_map.input_attachments,
sizeof(bind_map.input_attachments));
bind_map.kernel_args = (void *)
blob_read_bytes(blob, bind_map.kernel_arg_count *
sizeof(*bind_map.kernel_args));

View file

@ -173,6 +173,7 @@ struct intel_perf_query_result;
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
#define MAX_EMBEDDED_SAMPLERS 2048
#define MAX_CUSTOM_BORDER_COLORS 4096
#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
* use 64 here to avoid cache issues. This could most likely bring it back to
* 32 if we had different virtual addresses for the different views on a given
@ -4651,6 +4652,7 @@ struct anv_pipeline_bind_map {
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;
struct anv_pipeline_embedded_sampler_binding* embedded_sampler_to_binding;
BITSET_DECLARE(input_attachments, MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS + 1);
struct brw_kernel_arg_desc * kernel_args;
struct anv_push_range push_ranges[4];
@ -4897,8 +4899,6 @@ struct anv_graphics_pipeline {
uint32_t view_mask;
uint32_t instance_multiplier;
bool rp_has_ds_self_dep;
bool kill_pixel;
bool uses_xfb;
bool sample_shading_enable;

View file

@ -222,12 +222,24 @@ static const uint32_t vk_to_intel_shading_rate_combiner_op[] = {
#endif
static bool
has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn)
has_ds_feedback_loop(const struct anv_pipeline_bind_map *bind_map,
const struct vk_dynamic_graphics_state *dyn)
{
return (dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT)) ||
dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED ||
dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED;
if (BITSET_IS_EMPTY(bind_map->input_attachments))
return false;
const unsigned depth_att = dyn->ial.depth_att == MESA_VK_ATTACHMENT_NO_INDEX ?
MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.depth_att;
const unsigned stencil_att = dyn->ial.stencil_att == MESA_VK_ATTACHMENT_NO_INDEX ?
MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS : dyn->ial.stencil_att;
return
(dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT)) != 0 ||
(dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED &&
BITSET_TEST(bind_map->input_attachments, depth_att)) ||
(dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED &&
BITSET_TEST(bind_map->input_attachments, stencil_att));
}
UNUSED static bool
@ -343,9 +355,10 @@ want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
*/
struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
return pipeline->kill_pixel ||
pipeline->rp_has_ds_self_dep ||
has_ds_feedback_loop(dyn) ||
has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
@ -873,12 +886,13 @@ update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
const struct anv_cmd_graphics_state *gfx,
const struct anv_graphics_pipeline *pipeline)
{
struct anv_shader_bin *fs_bin = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
wm_prog_data && (pipeline->rp_has_ds_self_dep ||
has_ds_feedback_loop(dyn) ||
wm_prog_data->uses_kill),
wm_prog_data &&
(has_ds_feedback_loop(&fs_bin->bind_map, dyn) ||
wm_prog_data->uses_kill),
FRAGMENT);
}

View file

@ -1696,11 +1696,7 @@ compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
* 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
* of an alpha test.
*/
pipeline->rp_has_ds_self_dep =
(state->pipeline_flags &
VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
pipeline->kill_pixel =
pipeline->rp_has_ds_self_dep ||
wm_prog_data->uses_kill ||
wm_prog_data->uses_omask ||
(ms && ms->alpha_to_coverage_enable);