From a2e61ee1b9ce305cd3cfadd932be89a47765e02d Mon Sep 17 00:00:00 2001 From: "Eric R. Smith" Date: Tue, 24 Mar 2026 09:53:08 -0300 Subject: [PATCH] pan: change image2DMSArray lowering to use Z instead of Y We used to lower multisampled arrays to 3D images by adjusting the height and the Y coordinate so that addressing samples became addressing into the new base image. This worked for gallium, but was never implemented for vulkan, and also had the disadvantages that (a) we handled arrays and non-arrays differently, and (b) the image height was restricted to 4096. Change this so that we lower samples into the Z coordinate instead, adding new layers for each sample. This requires that we know the number of samples (so we have to save a sysval for this in gallium) but means that we handle arrays and non-arrays the same. More importantly, we can fit 3 bits to indicate the number of samples into the attribute descriptor in Vulkan, so this scheme works there as well as in OpenGL. Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: Erik Faye-Lund Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 15 ++---- src/panfrost/ci/panfrost-g52-fails.txt | 10 ---- .../compiler/pan_nir_lower_image_ms.c | 54 +++++++------------ src/panfrost/vulkan/panvk_vX_image_view.c | 6 ++- .../vulkan/panvk_vX_nir_lower_descriptors.c | 36 ++++++++++--- 5 files changed, 56 insertions(+), 65 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 7aff070267a..74806aba446 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -2187,18 +2187,9 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, cfg.slice_stride = slice_stride; if (is_msaa) { - if (cfg.r_dimension == 1) { - /* regular multisampled images get the sample index in - the R dimension */ - cfg.r_dimension = samples; - cfg.slice_stride = slice_stride / samples; - } else { - /* multisampled image arrays are emulated by making the - image "samples" times higher than the original image, - and fixing up the T coordinate by the sample number - to address the correct sample (on bifrost) */ - cfg.t_dimension *= samples; - } + /* scale up the R dimension by the number of samples */ + cfg.r_dimension *= samples; + cfg.slice_stride = slice_stride / samples; } } } diff --git a/src/panfrost/ci/panfrost-g52-fails.txt b/src/panfrost/ci/panfrost-g52-fails.txt index 196f6180d3d..fe03bc5bb16 100644 --- a/src/panfrost/ci/panfrost-g52-fails.txt +++ b/src/panfrost/ci/panfrost-g52-fails.txt @@ -307,16 +307,6 @@ dEQP-VK.api.device_init.create_device_global_priority_khr.basic,Fail dEQP-VK.api.device_init.create_device_global_priority_query.basic,Fail dEQP-VK.api.device_init.create_device_global_priority_query_khr.basic,Fail -# New with vk_clear -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.whole_copy_before_resolving_no_cab.4_bit,Fail -dEQP-VK.api.copy_and_blit.dedicated_allocation.resolve_image.copy_with_regions_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.whole_copy_before_resolving_no_cab.4_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.whole_copy_before_resolving_no_cab.4_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.copy_with_regions_before_resolving.4_bit,Fail -dEQP-VK.api.copy_and_blit.core.resolve_image.copy_with_regions_before_resolving.4_bit_bind_offset,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.whole_copy_before_resolving_no_cab.4_bit,Fail -dEQP-VK.api.copy_and_blit.copy_commands2.resolve_image.copy_with_regions_before_resolving.4_bit,Fail - # New failures with VKCTS 1.4.4.0 dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.combined_image_sampler,Crash dEQP-VK.binding_model.unused_invalid_descriptor.write.invalid.sampled_image,Crash diff --git a/src/panfrost/compiler/pan_nir_lower_image_ms.c b/src/panfrost/compiler/pan_nir_lower_image_ms.c index 100c631eb36..a999e3d17e8 100644 --- a/src/panfrost/compiler/pan_nir_lower_image_ms.c +++ b/src/panfrost/compiler/pan_nir_lower_image_ms.c @@ -33,45 +33,27 @@ nir_lower_image_ms(nir_builder *b, nir_intrinsic_instr *intr, nir_def *coord = intr->src[1].ssa; nir_def *sample = nir_channel(b, intr->src[2].ssa, 0); + bool is_array = nir_intrinsic_image_array(intr); - if (nir_intrinsic_image_array(intr)) { - /* Unlike textures, images only embed a single LOD, hence the zero. */ - nir_def *lod = nir_imm_int(b, 0); - nir_def *img_size = - img_deref ? nir_image_deref_size(b, 3, 32, intr->src[0].ssa, lod) : - nir_image_size(b, 3, 32, intr->src[0].ssa, lod, - .image_array = true, .image_dim = GLSL_SAMPLER_DIM_MS); - nir_def *img_height = nir_channel(b, img_size, 1); - nir_def *y_coord = nir_channel(b, coord, 1); - nir_def *z_coord = nir_channel(b, coord, 2); + nir_def *img_samples = + img_deref ? + nir_image_deref_samples(b, 32, intr->src[0].ssa, .image_array = is_array, + .image_dim = GLSL_SAMPLER_DIM_MS) : + nir_image_samples(b, 32, intr->src[0].ssa, .image_array = is_array, + .image_dim = GLSL_SAMPLER_DIM_MS); - /* With image2DMSArray, the Z coord is already used to index the array. We - * assume sample planes are adjacent and patch the Y coordinate to address - * the right sample plane. This means our image height is effectively - * limited to 4k though. - * - * Note that we don't trust image intrinsic is_array information because - * arrays of size one are allowed, and we only get to know the actual - * image size at bind time. - */ - nir_def *is_array = nir_ugt_imm(b, nir_channel(b, img_size, 2), 1); + nir_def *z_coord = nir_channel(b, coord, 2); - y_coord = nir_bcsel( - b, is_array, - nir_iadd(b, nir_imul(b, img_height, sample), y_coord), y_coord); - z_coord = nir_bcsel(b, is_array, z_coord, sample); - coord = nir_vec4(b, nir_channel(b, coord, 0), y_coord, z_coord, - nir_channel(b, coord, 3)); - - nir_src_rewrite(&intr->src[1], coord); - } else { - /* image2DMS is treated by panfrost as if it were a 3D image, so - * the sample index is in src[2]. We need to put this into the coordinates - * in the Z component. - */ - nir_src_rewrite(&intr->src[1], - nir_vector_insert_imm(b, coord, sample, 2)); - } + /* image2DMS is treated by panfrost as if it were a 3D image, so + * the sample index is in src[2]. We need to put this into the coordinates + * in the Z component. If there already was a Z component (i.e. an + * array index) then scale that by the number of samples and add it to + * the sample number. We've lowered image2DMSArray images to be 3D images + * with a larger Z. + */ + z_coord = nir_iadd(b, sample, nir_imul(b, z_coord, img_samples)); + nir_src_rewrite(&intr->src[1], + nir_vector_insert_imm(b, coord, z_coord, 2)); nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_3D); nir_intrinsic_set_image_array(intr, false); diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index d4f46c47dfb..78526ca61d3 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -261,11 +261,15 @@ prepare_attr_buf_descs(struct panvk_image_view *view) ? extent.depth : (view->pview.last_layer - view->pview.first_layer + 1); cfg.row_stride = slayout->tiled_or_linear.row_stride_B; - if (cfg.r_dimension > 1) { + if (cfg.r_dimension > 1 || nr_samples > 1) { cfg.slice_stride = view->pview.dim == MALI_TEXTURE_DIMENSION_3D ? slayout->tiled_or_linear.surface_stride_B : plane_layout->array_stride_B; } + if (nr_samples > 1) { + cfg.r_dimension *= nr_samples; + cfg.slice_stride /= nr_samples; + } } } #endif diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 6295db84361..1ba2b381d80 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -651,7 +651,26 @@ load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, /* The sizes are provided as 16-bit values with 1 subtracted so * convert to 32-bit and add 1. */ - return nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1); + tex_sz = nir_iadd_imm(b, nir_u2u32(b, tex_sz), 1); + + if (is_array && dim == GLSL_SAMPLER_DIM_MS) { + /* log2(sample_count) is placed into bits 7..9 of the pixel + * stride (see prepare_attr_buf_descs) which is stored at + * offset 8 in the attribute descriptor buffer + */ + nir_def *sample_count = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 8, 1, 32, ctx); + sample_count = nir_iand_imm(b, nir_ushr_imm(b, sample_count, 7), + BITFIELD_MASK(3)); + /* the Z value was lowered by the number of samples so that + * we could make the multisampled array look like a 3D texture + * Undo that here so we get the correct size + */ + nir_def *z_val = nir_channel(b, tex_sz, 2); + z_val = nir_ushr(b, z_val, sample_count); + tex_sz = nir_vector_insert_imm(b, tex_sz, z_val, 2); + } + return tex_sz; } } @@ -706,12 +725,17 @@ load_img_samples(nir_builder *b, nir_deref_instr *deref, assert(dim != GLSL_SAMPLER_DIM_BUF); - /* Sample count is stored in the image depth field. - * FIXME: This won't work for 2DMSArray images, but those are already - * broken. */ + /* log2(sample_count) is placed into bits 7..9 of the pixel + * stride (see prepare_attr_buf_descs) which is stored at + * offset 8 in the attribute descriptor buffer + */ + nir_def *one = nir_imm_int(b, 1); nir_def *sample_count = load_resource_deref_desc( - b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 22, 1, 16, ctx); - return nir_iadd_imm(b, nir_u2u32(b, sample_count), 1); + b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 8, 1, 32, ctx); + + sample_count = nir_iand_imm(b, nir_ushr_imm(b, sample_count, 7), + BITFIELD_MASK(3)); + return nir_ishl(b, one, sample_count); } static uint32_t