From 1729409141424a8a04887953f3acf648120f75f7 Mon Sep 17 00:00:00 2001
From: Erik Faye-Lund <erik.faye-lund@collabora.com>
Date: Thu, 6 Mar 2025 16:03:44 +0100
Subject: [PATCH] panvk: enable 8x and 16x msaa when supported

This uses the helpers from the previous patch to calculate how many
attachments and MRT buffers we have space for.

In the case where we can support more MSAA samples for smaller formats,
we also add support for that.

The flaking test seems to be due to a CTS issue, see this ticket for
details:

https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33925>
---
 docs/relnotes/new_features.txt              |  1 +
 src/panfrost/ci/panfrost-g610-flakes.txt    |  4 ++
 src/panfrost/vulkan/panvk_physical_device.c | 50 ++++++++++++++++++---
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index d447904cf7d..45dc5a05f1e 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -34,3 +34,4 @@ VK_KHR_float_controls on panvk
 VK_KHR_float_controls2 on panvk
 storageBuffer8BitAccess on panvk
 uniformAndStorageBuffer8BitAccess on panvk
+MSAA with 8 and 16 sample counts on panvk
diff --git a/src/panfrost/ci/panfrost-g610-flakes.txt b/src/panfrost/ci/panfrost-g610-flakes.txt
index 91033c80bfc..47a668a20f6 100644
--- a/src/panfrost/ci/panfrost-g610-flakes.txt
+++ b/src/panfrost/ci/panfrost-g610-flakes.txt
@@ -262,3 +262,7 @@ dEQP-VK.wsi.*.swapchain.simulate_oom.*
 
 # With linux kernel 6.13-rc4 uprev
 dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_8x6_rgba.rgba_astc_8x6_khr_srgb8_alpha8_astc_8x6_khr.texture3d_to_cubemap
+
+# CTS issue: https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651
+dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint_separate_layouts.depth_zero_stencil_min_testing_stencil_samplemask
+dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint.depth_zero_stencil_min_testing_stencil_samplemask
diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c
index de1347a1b6b..338204cfb2b 100644
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@@ -567,20 +567,43 @@ get_conformance_version(unsigned arch)
    return (VkConformanceVersion){0, 0, 0, 0};
 }
 
+static VkSampleCountFlags
+get_sample_counts(unsigned arch, unsigned max_tib_size,
+                  unsigned max_cbuf_atts, unsigned format_size)
+{
+   VkSampleCountFlags sample_counts =
+      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+
+   unsigned max_msaa =
+      pan_get_max_msaa(arch, max_tib_size, max_cbuf_atts, format_size);
+
+   assert(max_msaa >= 4);
+
+   if (max_msaa >= 8)
+      sample_counts |= VK_SAMPLE_COUNT_8_BIT;
+
+   if (max_msaa >= 16)
+      sample_counts |= VK_SAMPLE_COUNT_16_BIT;
+
+   return sample_counts;
+}
+
 static void
 get_device_properties(const struct panvk_instance *instance,
                       const struct panvk_physical_device *device,
                       struct vk_properties *properties)
 {
-   /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
+   unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
+   unsigned max_tib_size = pan_get_max_tib_size(arch, device->model);
+   const unsigned max_cbuf_format = 16; /* R32G32B32A32 */
+
+   unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
    VkSampleCountFlags sample_counts =
-      VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
+      get_sample_counts(arch, max_tib_size, max_cbuf_atts, max_cbuf_format);
 
    uint64_t os_page_size = 4096;
    os_get_page_size(&os_page_size);
 
-   unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
-
    /* Ensure that the max threads count per workgroup is valid for Bifrost */
    assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
 
@@ -784,7 +807,7 @@ get_device_properties(const struct panvk_instance *instance,
       .framebufferDepthSampleCounts = sample_counts,
       .framebufferStencilSampleCounts = sample_counts,
       .framebufferNoAttachmentsSampleCounts = sample_counts,
-      .maxColorAttachments = 8,
+      .maxColorAttachments = max_cbuf_atts,
       .sampledImageColorSampleCounts = sample_counts,
       .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
       .sampledImageDepthSampleCounts = sample_counts,
@@ -1431,6 +1454,20 @@ get_image_format_features(struct panvk_physical_device *physical_device,
    return features;
 }
 
+static VkFormatFeatureFlags
+get_image_format_sample_counts(struct panvk_physical_device *physical_device,
+                               VkFormat format)
+{
+   unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
+   unsigned max_tib_size = pan_get_max_tib_size(arch, physical_device->model);
+   unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
+
+   assert(!vk_format_is_compressed(format));
+   unsigned format_size = vk_format_get_blocksize(format);
+
+   return get_sample_counts(arch, max_tib_size, max_cbuf_atts, format_size);
+}
+
 static VkFormatFeatureFlags
 get_buffer_format_features(struct panvk_physical_device *physical_device,
                            VkFormat format)
@@ -1604,7 +1641,8 @@ get_image_format_properties(struct panvk_physical_device *physical_device,
          VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
        !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
        !(all_usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
-      sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
+      sampleCounts |=
+         get_image_format_sample_counts(physical_device, info->format);
    }
 
    /* From the Vulkan 1.2.199 spec: