From 3aba64c1684c7e516c0ff3ac401ea2b83c126c97 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 29 Apr 2026 17:38:47 +0200 Subject: [PATCH] radv/meta: fix expanding HTILE on compute with multisampling The expand was considering only the first sample, very old bug. This fixes test_{copy,compute}_queue_depth_stencil_msaa from vkd3d-proton on GFX11-GFX11.7 GPUs. Older GPUs don't support image stores with depth/stencil MSAA images. Cc: mesa-stable Signed-off-by: Samuel Pitoiset (cherry picked from commit 608bc0e5930ec2c75511eb818d787d8c1432e8e6) Part-of: --- .pick_status.json | 2 +- src/amd/vulkan/meta/radv_meta_decompress.c | 31 +++++++++++++++++----- src/amd/vulkan/nir/radv_meta_nir.c | 20 +++++++++----- src/amd/vulkan/nir/radv_meta_nir.h | 2 +- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 6cd309f9077..f61873ef592 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2244,7 +2244,7 @@ "description": "radv/meta: fix expanding HTILE on compute with multisampling", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/vulkan/meta/radv_meta_decompress.c b/src/amd/vulkan/meta/radv_meta_decompress.c index 40fecc643e7..4bac3c54af4 100644 --- a/src/amd/vulkan/meta/radv_meta_decompress.c +++ b/src/amd/vulkan/meta/radv_meta_decompress.c @@ -260,10 +260,9 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image } static VkResult -get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipelineLayout *layout_out) +get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out) { enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS; - VkResult result; const VkDescriptorSetLayoutBinding bindings[] = { { @@ -288,18 +287,38 @@ get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipeline .pBindings = bindings, }; - result = vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key), - layout_out); + return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key), + layout_out); +} + +struct radv_htile_expand_cs_key { + enum radv_meta_object_key_type type; + uint8_t samples; +}; + +static VkResult +get_pipeline_cs(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out, + VkPipelineLayout *layout_out) +{ + const uint32_t samples = image->vk.samples; + struct radv_htile_expand_cs_key key; + VkResult result; + + result = get_pipeline_layout(device, layout_out); if (result != VK_SUCCESS) return result; + memset(&key, 0, sizeof(key)); + key.type = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS; + key.samples = samples; + VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key)); if (pipeline_from_cache != VK_NULL_HANDLE) { *pipeline_out = pipeline_from_cache; return VK_SUCCESS; } - nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader(); + nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader(samples); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -336,7 +355,7 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad assert(radv_tc_compat_htile_enabled(image, subresourceRange->baseMipLevel)); - result = get_pipeline_cs(device, &pipeline, &layout); + result = get_pipeline_cs(device, image, &pipeline, &layout); if (result != VK_SUCCESS) { vk_command_buffer_set_error(&cmd_buffer->vk, result); return; diff --git a/src/amd/vulkan/nir/radv_meta_nir.c b/src/amd/vulkan/nir/radv_meta_nir.c index a4c4ac47769..a3fc3f32035 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.c +++ b/src/amd/vulkan/nir/radv_meta_nir.c @@ -1008,9 +1008,10 @@ radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint } nir_shader * -radv_meta_nir_build_expand_depth_stencil_compute_shader() +radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples) { - const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT); + const enum glsl_sampler_dim dim = samples > 1 ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D; + const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT); nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "expand_depth_stencil_compute"); @@ -1032,9 +1033,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader() nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id); - nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id, - nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, - .dest_type = nir_type_uint32); + nir_def *data[8]; + for (uint32_t i = 0; i < samples; i++) { + data[i] = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id, nir_imm_int(&b, i), + nir_imm_int(&b, 0), .image_dim = dim, .dest_type = nir_type_uint32); + } /* We need a SCOPE_DEVICE memory_scope because ACO will avoid * creating a vmcnt(0) because it expects the L1 cache to keep memory @@ -1043,8 +1046,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader() nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE, .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo); - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data, - nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D); + for (uint32_t i = 0; i < samples; i++) { + nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), data[i], + nir_imm_int(&b, 0), .image_dim = dim); + } + return b.shader; } diff --git a/src/amd/vulkan/nir/radv_meta_nir.h b/src/amd/vulkan/nir/radv_meta_nir.h index 6d3372ad93f..5e358a23b49 100644 --- a/src/amd/vulkan/nir/radv_meta_nir.h +++ b/src/amd/vulkan/nir/radv_meta_nir.h @@ -74,7 +74,7 @@ nir_shader *radv_meta_nir_build_copy_vrs_htile_shader(enum amd_gfx_level gfx_lev nir_shader *radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint32_t gb_addr_config, const struct radeon_surf *surf); -nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(void); +nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples); nir_shader *radv_meta_nir_build_dcc_decompress_compute_shader(void);