radv/meta: fix expanding HTILE on compute with multisampling

The expand was considering only the first sample, very old bug.

This fixes test_{copy,compute}_queue_depth_stencil_msaa from
vkd3d-proton on GFX11-GFX11.7 GPUs. Older GPUs don't support image
stores with depth/stencil MSAA images.

Cc: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41267>
This commit is contained in:
Samuel Pitoiset 2026-04-29 17:38:47 +02:00 committed by Marge Bot
parent 207aa9eba4
commit 608bc0e593
4 changed files with 39 additions and 16 deletions

View file

@ -1,2 +0,0 @@
test_compute_queue_depth_stencil_msaa,Fail
test_copy_queue_depth_stencil_msaa,Fail

View file

@ -261,10 +261,9 @@ radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image
}
static VkResult
get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
{
enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS;
VkResult result;
const VkDescriptorSetLayoutBinding bindings[] = {
{
@ -289,18 +288,38 @@ get_pipeline_cs(struct radv_device *device, VkPipeline *pipeline_out, VkPipeline
.pBindings = bindings,
};
result = vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
layout_out);
return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
layout_out);
}
struct radv_htile_expand_cs_key {
enum radv_meta_object_key_type type;
uint8_t samples;
};
static VkResult
get_pipeline_cs(struct radv_device *device, const struct radv_image *image, VkPipeline *pipeline_out,
VkPipelineLayout *layout_out)
{
const uint32_t samples = image->vk.samples;
struct radv_htile_expand_cs_key key;
VkResult result;
result = get_pipeline_layout(device, layout_out);
if (result != VK_SUCCESS)
return result;
memset(&key, 0, sizeof(key));
key.type = RADV_META_OBJECT_KEY_HTILE_EXPAND_CS;
key.samples = samples;
VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
if (pipeline_from_cache != VK_NULL_HANDLE) {
*pipeline_out = pipeline_from_cache;
return VK_SUCCESS;
}
nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader();
nir_shader *cs = radv_meta_nir_build_expand_depth_stencil_compute_shader(samples);
const VkPipelineShaderStageCreateInfo stage_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@ -337,7 +356,7 @@ radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct rad
assert(radv_tc_compat_htile_enabled(image, subresourceRange->baseMipLevel));
result = get_pipeline_cs(device, &pipeline, &layout);
result = get_pipeline_cs(device, image, &pipeline, &layout);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;

View file

@ -1008,9 +1008,10 @@ radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint
}
nir_shader *
radv_meta_nir_build_expand_depth_stencil_compute_shader()
radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples)
{
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
const enum glsl_sampler_dim dim = samples > 1 ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
@ -1032,9 +1033,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader()
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id,
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D,
.dest_type = nir_type_uint32);
nir_def *data[8];
for (uint32_t i = 0; i < samples; i++) {
data[i] = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id, nir_imm_int(&b, i),
nir_imm_int(&b, 0), .image_dim = dim, .dest_type = nir_type_uint32);
}
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
@ -1043,8 +1046,11 @@ radv_meta_nir_build_expand_depth_stencil_compute_shader()
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), data[i],
nir_imm_int(&b, 0), .image_dim = dim);
}
return b.shader;
}

View file

@ -74,7 +74,7 @@ nir_shader *radv_meta_nir_build_copy_vrs_htile_shader(enum amd_gfx_level gfx_lev
nir_shader *radv_meta_nir_build_dcc_retile_compute_shader(enum amd_gfx_level gfx_level, uint32_t gb_addr_config,
const struct radeon_surf *surf);
nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(void);
nir_shader *radv_meta_nir_build_expand_depth_stencil_compute_shader(uint8_t samples);
nir_shader *radv_meta_nir_build_dcc_decompress_compute_shader(void);