radv/meta: fix expanding HTILE on compute with multisampling

The expand was considering only the first sample, very old bug. This fixes test_{copy,compute}_queue_depth_stencil_msaa from vkd3d-proton on GFX11-GFX11.7 GPUs. Older GPUs don't support image stores with depth/stencil MSAA images. Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41267>
2026-05-07 11:28:05 +02:00 · 2026-04-29 17:38:47 +02:00 · 2026-04-29 17:38:47 +02:00 · 608bc0e593
commit 608bc0e593
parent 207aa9eba4
4 changed files with 39 additions and 16 deletions
--- a/src/amd/ci/radv-navi31-vkd3d-fails.txt
+++ b/src/amd/ci/radv-navi31-vkd3d-fails.txt
@ -1,2 +0,0 @@
-test_compute_queue_depth_stencil_msaa,Fail
-test_copy_queue_depth_stencil_msaa,Fail
--- a/src/amd/vulkan/meta/radv_meta_decompress.c
+++ b/src/amd/vulkan/meta/radv_meta_decompress.c
@ -261,10 +261,9 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image
 }

 static VkResult
-get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
+get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
 {
   enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS;
-   VkResult result;

   const VkDescriptorSetLayoutBinding bindings[] = {
      {
@ -289,18 +288,38 @@ get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipeline
      .pBindings = bindings,
   };

-   result = vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
-                                        layout_out);
+   return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
+                                      layout_out);
+}
+
+struct radv_htile_expand_cs_key {
+   enum radv_meta_object_key_type type;
+   uint8_t samples;
+};
+
+static VkResult
+get_pipeline_cs(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out,
+                VkPipelineLayout *layout_out)
+{
+   const uint32_t samples = image->vk.samples;
+   struct radv_htile_expand_cs_key key;
+   VkResult result;
+
+   result = get_pipeline_layout(device, layout_out);
   if (result != VK_SUCCESS)
      return result;

+   memset(&key, 0, sizeof(key));
+   key.type = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS;
+   key.samples = samples;
+
   VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
   if (pipeline_from_cache != VK_NULL_HANDLE) {
      *pipeline_out = pipeline_from_cache;
      return VK_SUCCESS;
   }

-   nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader();
+   nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader(samples);

   const VkPipelineShaderStageCreateInfo stage_info = {
      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@ -337,7 +356,7 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad

   assert(radv_tc_compat_htile_enabled(image, subresourceRange->baseMipLevel));

-   result = get_pipeline_cs(device, &pipeline, &layout);
+   result = get_pipeline_cs(device, image, &pipeline, &layout);
   if (result != VK_SUCCESS) {
      vk_command_buffer_set_error(&cmd_buffer->vk, result);
      return;
--- a/src/amd/vulkan/nir/radv_meta_nir.c
+++ b/src/amd/vulkan/nir/radv_meta_nir.c
@ -1008,9 +1008,10 @@ radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint
 }

 nir_shader *
-radv_meta_nir_build_expand_depth_stencil_compute_shader()
+radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples)
 {
-   const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
+   const enum glsl_sampler_dim dim = samples > 1 ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
+   const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);

   nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");

@ -1032,9 +1033,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader()

   nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

-   nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id,
-                                        nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D,
-                                        .dest_type = nir_type_uint32);
+   nir_def *data[8];
+   for (uint32_t i = 0; i < samples; i++) {
+      data[i] = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id, nir_imm_int(&b, i),
+                                     nir_imm_int(&b, 0), .image_dim = dim, .dest_type = nir_type_uint32);
+   }

   /* We need a SCOPE_DEVICE memory_scope because ACO will avoid
    * creating a vmcnt(0) because it expects the L1 cache to keep memory
@ -1043,8 +1046,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader()
   nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
               .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);

-   nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data,
-                         nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
+   for (uint32_t i = 0; i < samples; i++) {
+      nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), data[i],
+                            nir_imm_int(&b, 0), .image_dim = dim);
+   }
+
   return b.shader;
 }

--- a/src/amd/vulkan/nir/radv_meta_nir.h
+++ b/src/amd/vulkan/nir/radv_meta_nir.h
@ -74,7 +74,7 @@ nir_shader *radv_meta_nir_build_copy_vrs_htile_shader(enum amd_gfx_level gfx_lev
 nir_shader *radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint32_t gb_addr_config,
                                                          const struct radeon_surf *surf);

-nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(void);
+nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples);

 nir_shader *radv_meta_nir_build_dcc_decompress_compute_shader(void);