panvk: enable 8x and 16x msaa when supported

This uses the helpers from the previous patch to calculate how many attachments and MRT buffers we have space for. In the case where we can support more MSAA samples for smaller formats, we also add support for that. The flaking test seems to be due to a CTS issue, see this ticket for details: https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651 Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33925>
2026-05-08 02:38:04 +02:00 · 2025-03-06 16:03:44 +01:00 · 2025-03-06 16:03:44 +01:00 · 1729409141
commit 1729409141
parent 329568b5eb
3 changed files with 49 additions and 6 deletions
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@ -34,3 +34,4 @@ VK_KHR_float_controls on panvk
 VK_KHR_float_controls2 on panvk
 storageBuffer8BitAccess on panvk
 uniformAndStorageBuffer8BitAccess on panvk
+MSAA with 8 and 16 sample counts on panvk
--- a/src/panfrost/ci/panfrost-g610-flakes.txt
+++ b/src/panfrost/ci/panfrost-g610-flakes.txt
@ -262,3 +262,7 @@ dEQP-VK.wsi.*.swapchain.simulate_oom.*

 # With linux kernel 6.13-rc4 uprev
 dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_8x6_rgba.rgba_astc_8x6_khr_srgb8_alpha8_astc_8x6_khr.texture3d_to_cubemap
+
+# CTS issue: https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651
+dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint_separate_layouts.depth_zero_stencil_min_testing_stencil_samplemask
+dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint.depth_zero_stencil_min_testing_stencil_samplemask
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@ -567,20 +567,43 @@ get_conformance_version(unsigned arch)
   return (VkConformanceVersion){0, 0, 0, 0};
 }

+static VkSampleCountFlags
+get_sample_counts(unsigned arch, unsigned max_tib_size,
+                  unsigned max_cbuf_atts, unsigned format_size)
+{
+   VkSampleCountFlags sample_counts =
+      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+
+   unsigned max_msaa =
+      pan_get_max_msaa(arch, max_tib_size, max_cbuf_atts, format_size);
+
+   assert(max_msaa >= 4);
+
+   if (max_msaa >= 8)
+      sample_counts |= VK_SAMPLE_COUNT_8_BIT;
+
+   if (max_msaa >= 16)
+      sample_counts |= VK_SAMPLE_COUNT_16_BIT;
+
+   return sample_counts;
+}
+
 static void
 get_device_properties(const struct panvk_instance *instance,
                      const struct panvk_physical_device *device,
                      struct vk_properties *properties)
 {
-   /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
+   unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
+   unsigned max_tib_size = pan_get_max_tib_size(arch, device->model);
+   const unsigned max_cbuf_format = 16; /* R32G32B32A32 */
+
+   unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
   VkSampleCountFlags sample_counts =
-      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+      get_sample_counts(arch, max_tib_size, max_cbuf_atts, max_cbuf_format);

   uint64_t os_page_size = 4096;
   os_get_page_size(&os_page_size);

-   unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
-
   /* Ensure that the max threads count per workgroup is valid for Bifrost */
   assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);

@ -784,7 +807,7 @@ get_device_properties(const struct panvk_instance *instance,
      .framebufferDepthSampleCounts = sample_counts,
      .framebufferStencilSampleCounts = sample_counts,
      .framebufferNoAttachmentsSampleCounts = sample_counts,
-      .maxColorAttachments = 8,
+      .maxColorAttachments = max_cbuf_atts,
      .sampledImageColorSampleCounts = sample_counts,
      .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
      .sampledImageDepthSampleCounts = sample_counts,
@ -1431,6 +1454,20 @@ get_image_format_features(struct panvk_physical_device *physical_device,
   return features;
 }

+static VkFormatFeatureFlags
+get_image_format_sample_counts(struct panvk_physical_device *physical_device,
+                               VkFormat format)
+{
+   unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
+   unsigned max_tib_size = pan_get_max_tib_size(arch, physical_device->model);
+   unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
+
+   assert(!vk_format_is_compressed(format));
+   unsigned format_size = vk_format_get_blocksize(format);
+
+   return get_sample_counts(arch, max_tib_size, max_cbuf_atts, format_size);
+}
+
 static VkFormatFeatureFlags
 get_buffer_format_features(struct panvk_physical_device *physical_device,
                           VkFormat format)
@ -1604,7 +1641,8 @@ get_image_format_properties(struct panvk_physical_device *physical_device,
         VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
       !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
       !(all_usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
-      sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
+      sampleCounts |=
+         get_image_format_sample_counts(physical_device, info->format);
   }

   /* From the Vulkan 1.2.199 spec: