From 1729409141424a8a04887953f3acf648120f75f7 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Thu, 6 Mar 2025 16:03:44 +0100 Subject: [PATCH] panvk: enable 8x and 16x msaa when supported This uses the helpers from the previous patch to calculate how many attachments and MRT buffers we have space for. In the case where we can support more MSAA samples for smaller formats, we also add support for that. The flaking test seems to be due to a CTS issue, see this ticket for details: https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651 Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- docs/relnotes/new_features.txt | 1 + src/panfrost/ci/panfrost-g610-flakes.txt | 4 ++ src/panfrost/vulkan/panvk_physical_device.c | 50 ++++++++++++++++++--- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index d447904cf7d..45dc5a05f1e 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -34,3 +34,4 @@ VK_KHR_float_controls on panvk VK_KHR_float_controls2 on panvk storageBuffer8BitAccess on panvk uniformAndStorageBuffer8BitAccess on panvk +MSAA with 8 and 16 sample counts on panvk diff --git a/src/panfrost/ci/panfrost-g610-flakes.txt b/src/panfrost/ci/panfrost-g610-flakes.txt index 91033c80bfc..47a668a20f6 100644 --- a/src/panfrost/ci/panfrost-g610-flakes.txt +++ b/src/panfrost/ci/panfrost-g610-flakes.txt @@ -262,3 +262,7 @@ dEQP-VK.wsi.*.swapchain.simulate_oom.* # With linux kernel 6.13-rc4 uprev dEQP-GLES31.functional.copy_image.compressed.viewclass_astc_8x6_rgba.rgba_astc_8x6_khr_srgb8_alpha8_astc_8x6_khr.texture3d_to_cubemap + +# CTS issue: https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5651 +dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint_separate_layouts.depth_zero_stencil_min_testing_stencil_samplemask +dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_16.d32_sfloat_s8_uint.depth_zero_stencil_min_testing_stencil_samplemask diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index de1347a1b6b..338204cfb2b 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -567,20 +567,43 @@ get_conformance_version(unsigned arch) return (VkConformanceVersion){0, 0, 0, 0}; } +static VkSampleCountFlags +get_sample_counts(unsigned arch, unsigned max_tib_size, + unsigned max_cbuf_atts, unsigned format_size) +{ + VkSampleCountFlags sample_counts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + + unsigned max_msaa = + pan_get_max_msaa(arch, max_tib_size, max_cbuf_atts, format_size); + + assert(max_msaa >= 4); + + if (max_msaa >= 8) + sample_counts |= VK_SAMPLE_COUNT_8_BIT; + + if (max_msaa >= 16) + sample_counts |= VK_SAMPLE_COUNT_16_BIT; + + return sample_counts; +} + static void get_device_properties(const struct panvk_instance *instance, const struct panvk_physical_device *device, struct vk_properties *properties) { - /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */ + unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); + unsigned max_tib_size = pan_get_max_tib_size(arch, device->model); + const unsigned max_cbuf_format = 16; /* R32G32B32A32 */ + + unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size); VkSampleCountFlags sample_counts = - VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; + get_sample_counts(arch, max_tib_size, max_cbuf_atts, max_cbuf_format); uint64_t os_page_size = 4096; os_get_page_size(&os_page_size); - unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); - /* Ensure that the max threads count per workgroup is valid for Bifrost */ assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024); @@ -784,7 +807,7 @@ get_device_properties(const struct panvk_instance *instance, .framebufferDepthSampleCounts = sample_counts, .framebufferStencilSampleCounts = sample_counts, .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = 8, + .maxColorAttachments = max_cbuf_atts, .sampledImageColorSampleCounts = sample_counts, .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = sample_counts, @@ -1431,6 +1454,20 @@ get_image_format_features(struct panvk_physical_device *physical_device, return features; } +static VkFormatFeatureFlags +get_image_format_sample_counts(struct panvk_physical_device *physical_device, + VkFormat format) +{ + unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned max_tib_size = pan_get_max_tib_size(arch, physical_device->model); + unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size); + + assert(!vk_format_is_compressed(format)); + unsigned format_size = vk_format_get_blocksize(format); + + return get_sample_counts(arch, max_tib_size, max_cbuf_atts, format_size); +} + static VkFormatFeatureFlags get_buffer_format_features(struct panvk_physical_device *physical_device, VkFormat format) @@ -1604,7 +1641,8 @@ get_image_format_properties(struct panvk_physical_device *physical_device, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(all_usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - sampleCounts |= VK_SAMPLE_COUNT_4_BIT; + sampleCounts |= + get_image_format_sample_counts(physical_device, info->format); } /* From the Vulkan 1.2.199 spec: