From a982fae530996e9c0b05b7325a4d72fc578d1dc2 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 04:54:20 -0500 Subject: [PATCH 01/41] vulkan/render_pass: Allocate the max number of formats and samplers per-subpass We're not really saving ourselves anything by making these part of the dynamic allocation. We only support 8 color attachments anyway and these things are tiny. This does make everything a lot simpler and give us more flexibility going forwards. --- src/vulkan/runtime/vk_render_pass.c | 39 ++++++++--------------------- src/vulkan/runtime/vk_render_pass.h | 7 +++++- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index b3d51d9a512..c961b31db8c 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -446,19 +446,12 @@ vk_common_CreateRenderPass2(VkDevice _device, pCreateInfo->dependencyCount); uint32_t subpass_attachment_count = 0; - uint32_t subpass_color_attachment_count = 0; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { subpass_attachment_count += num_subpass_attachments2(&pCreateInfo->pSubpasses[i]); - subpass_color_attachment_count += - pCreateInfo->pSubpasses[i].colorAttachmentCount; } VK_MULTIALLOC_DECL(&ma, struct vk_subpass_attachment, subpass_attachments, subpass_attachment_count); - VK_MULTIALLOC_DECL(&ma, VkFormat, subpass_color_formats, - subpass_color_attachment_count); - VK_MULTIALLOC_DECL(&ma, VkSampleCountFlagBits, subpass_color_samples, - subpass_color_attachment_count); if (!vk_object_multizalloc(device, &ma, pAllocator, VK_OBJECT_TYPE_RENDER_PASS)) @@ -477,8 +470,6 @@ vk_common_CreateRenderPass2(VkDevice _device, } struct vk_subpass_attachment *next_subpass_attachment = subpass_attachments; - VkFormat *next_subpass_color_format = subpass_color_formats; - VkSampleCountFlagBits *next_subpass_color_samples = subpass_color_samples; for (uint32_t s = 0; s < pCreateInfo->subpassCount; s++) { const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[s]; struct vk_subpass *subpass = &pass->subpasses[s]; @@ -663,29 +654,23 @@ vk_common_CreateRenderPass2(VkDevice _device, } } - VkFormat *color_formats = NULL; - VkSampleCountFlagBits *color_samples = NULL; VkSampleCountFlagBits samples = 0; if (desc->colorAttachmentCount > 0) { - color_formats = next_subpass_color_format; - color_samples = next_subpass_color_samples; for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { const VkAttachmentReference2 *ref = &desc->pColorAttachments[a]; if (ref->attachment >= pCreateInfo->attachmentCount) { - color_formats[a] = VK_FORMAT_UNDEFINED; - color_samples[a] = VK_SAMPLE_COUNT_1_BIT; + subpass->color_attachment_formats[a] = VK_FORMAT_UNDEFINED; + subpass->sample_count_amd.samples[a] = VK_SAMPLE_COUNT_1_BIT; } else { const VkAttachmentDescription2 *att = &pCreateInfo->pAttachments[ref->attachment]; - color_formats[a] = att->format; - color_samples[a] = att->samples; + subpass->color_attachment_formats[a] = att->format; + subpass->sample_count_amd.samples[a] = att->samples; samples |= att->samples; } } - next_subpass_color_format += desc->colorAttachmentCount; - next_subpass_color_samples += desc->colorAttachmentCount; } subpass->ial.depth = VK_ATTACHMENT_UNUSED; @@ -722,17 +707,17 @@ vk_common_CreateRenderPass2(VkDevice _device, } } - subpass->sample_count_info_amd = (VkAttachmentSampleCountInfoAMD) { + subpass->sample_count_amd.info = (VkAttachmentSampleCountInfoAMD) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, .pNext = NULL, .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentSamples = color_samples, + .pColorAttachmentSamples = subpass->sample_count_amd.samples, .depthStencilAttachmentSamples = depth_stencil_samples, }; subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, - .pNext = &subpass->sample_count_info_amd, + .pNext = &subpass->sample_count_amd.info, .colorAttachmentCount = subpass->color_count, .pColorAttachmentInputIndices = subpass->ial.colors, /* From the Vulkan 1.3.204 spec: @@ -773,7 +758,7 @@ vk_common_CreateRenderPass2(VkDevice _device, .pNext = &subpass->ial.info, .viewMask = desc->viewMask, .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = color_formats, + .pColorAttachmentFormats = subpass->color_attachment_formats, .depthAttachmentFormat = depth_format, .stencilAttachmentFormat = stencil_format, }; @@ -785,7 +770,7 @@ vk_common_CreateRenderPass2(VkDevice _device, .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, .viewMask = desc->viewMask, .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = color_formats, + .pColorAttachmentFormats = subpass->color_attachment_formats, .depthAttachmentFormat = depth_format, .stencilAttachmentFormat = stencil_format, .rasterizationSamples = samples, @@ -802,10 +787,6 @@ vk_common_CreateRenderPass2(VkDevice _device, } assert(next_subpass_attachment == subpass_attachments + subpass_attachment_count); - assert(next_subpass_color_format == - subpass_color_formats + subpass_color_attachment_count); - assert(next_subpass_color_samples == - subpass_color_samples + subpass_color_attachment_count); /* Walk forwards over the subpasses to compute first_subpass masks for all * attachments. @@ -983,7 +964,7 @@ vk_get_pipeline_sample_count_info_amd(const VkGraphicsPipelineCreateInfo *info) VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); if (render_pass != NULL) { assert(info->subpass < render_pass->subpass_count); - return &render_pass->subpasses[info->subpass].sample_count_info_amd; + return &render_pass->subpasses[info->subpass].sample_count_amd.info; } return vk_find_struct_const(info->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD); diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 302a9606ebc..7e39e8fb971 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -136,7 +136,10 @@ struct vk_subpass { * * This is in the pNext chain of pipeline_info and inheritance_info. */ - VkAttachmentSampleCountInfoAMD sample_count_info_amd; + struct { + VkAttachmentSampleCountInfoAMD info; + VkSampleCountFlagBits samples[MESA_VK_MAX_COLOR_ATTACHMENTS]; + } sample_count_amd; /** VkRenderingInputAttachmentIndexInfo for this subpass * @@ -152,6 +155,8 @@ struct vk_subpass { uint32_t stencil; } ial; + VkFormat color_attachment_formats[MESA_VK_MAX_COLOR_ATTACHMENTS]; + /** VkPipelineRenderingCreateInfo for this subpass * * Returned by vk_get_pipeline_rendering_create_info() if From 9cc7743af613330833b66e09c5ebddb942a59c84 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 05:03:27 -0500 Subject: [PATCH 02/41] vulkan/render_pass: Simplify ial setup --- src/vulkan/runtime/vk_render_pass.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index c961b31db8c..5e63fc4d3de 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -659,12 +659,20 @@ vk_common_CreateRenderPass2(VkDevice _device, for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { const VkAttachmentReference2 *ref = &desc->pColorAttachments[a]; if (ref->attachment >= pCreateInfo->attachmentCount) { + subpass->ial.colors[a] = VK_ATTACHMENT_UNUSED; subpass->color_attachment_formats[a] = VK_FORMAT_UNDEFINED; subpass->sample_count_amd.samples[a] = VK_SAMPLE_COUNT_1_BIT; } else { const VkAttachmentDescription2 *att = &pCreateInfo->pAttachments[ref->attachment]; + for (uint32_t j = 0; j < subpass->input_count; j++) { + if (subpass->input_attachments[j].attachment == + subpass->color_attachments[a].attachment) { + subpass->ial.colors[a] = j; + } + } + subpass->color_attachment_formats[a] = att->format; subpass->sample_count_amd.samples[a] = att->samples; @@ -738,21 +746,6 @@ vk_common_CreateRenderPass2(VkDevice _device, .pStencilInputAttachmentIndex = &subpass->ial.stencil, }; - /* Build the color -> input attachment map. */ - for (uint32_t i = 0; i < subpass->color_count; i++) { - subpass->ial.colors[i] = VK_ATTACHMENT_UNUSED; - - if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) - continue; - - for (uint32_t j = 0; j < subpass->input_count; j++) { - if (subpass->input_attachments[j].attachment == - subpass->color_attachments[i].attachment) { - subpass->ial.colors[i] = j; - } - } - } - subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, .pNext = &subpass->ial.info, From 5f983bac773f993ffc5c591f977c59c779ec5190 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 14:22:22 -0500 Subject: [PATCH 03/41] vulkan/render_pass: Gather external dependencies at create time --- src/vulkan/runtime/vk_render_pass.c | 19 ++++++++++++------- src/vulkan/runtime/vk_render_pass.h | 3 +++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 5e63fc4d3de..31ea757eba2 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -878,6 +878,16 @@ vk_common_CreateRenderPass2(VkDevice _device, .view_offset = dep->viewOffset, }; + if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) { + assert(dep->dstSubpass < pass->subpass_count); + pass->subpasses[dep->dstSubpass].has_external_src_dependency = true; + } + + if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) { + assert(dep->srcSubpass < pass->subpass_count); + pass->subpasses[dep->srcSubpass].has_external_dst_dependency = true; + } + /* From the Vulkan 1.3.204 spec: * * "If a VkMemoryBarrier2 is included in the pNext chain, @@ -2187,7 +2197,6 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * implicit one. */ STACK_ARRAY(VkMemoryBarrier2, mem_barriers, pass->dependency_count + 1); uint32_t mem_barrier_count = 0; - bool external_dependency = false; for (uint32_t d = 0; d < pass->dependency_count; d++) { const struct vk_subpass_dependency *dep = &pass->dependencies[d]; if (dep->dst_subpass != subpass_idx) @@ -2235,7 +2244,6 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .dstStageMask = dep->dst_stage_mask, .dstAccessMask = dep->dst_access_mask, }; - external_dependency |= (dep->src_subpass == VK_SUBPASS_EXTERNAL); } uint32_t max_image_barrier_count = 0; @@ -2286,7 +2294,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } assert(image_barrier_count <= max_image_barrier_count); - if (has_layout_transition && !external_dependency) { + if (has_layout_transition && !subpass->has_external_src_dependency) { /* From the Vulkan 1.3.232 spec: * * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the @@ -2510,10 +2518,7 @@ end_subpass(struct vk_command_buffer *cmd_buffer, }; } - /* If we have a barrier, we have an external dependency */ - bool external_dependency = mem_barrier_count > 0; - - if (!external_dependency) { + if (!subpass->has_external_dst_dependency) { bool has_layout_transition = false; for (uint32_t a = 0; a < subpass->attachment_count; a++) { const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 7e39e8fb971..937ed1c1434 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -120,6 +120,9 @@ struct vk_subpass { */ uint32_t view_mask; + bool has_external_src_dependency; + bool has_external_dst_dependency; + /** VkSubpassDescriptionDepthStencilResolve::depthResolveMode */ VkResolveModeFlagBits depth_resolve_mode; From 582b0d76d54295263b65339de21618c765612cf6 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 13:53:26 -0500 Subject: [PATCH 04/41] vulkan/render_pass: Rework MRTSS info For things that get chained into VkRenderingInfo, we don't typically put them in the render pass itself and instead construct them on-ty-fly because we need to chain them into other on-the-fly structs. We only put stuff that gets queried at pipeline creation in the subpass itself. There's no good reason why MRTSS is any different, especially given that it can be represented as a single uint32_t in the subpass. --- src/vulkan/runtime/vk_render_pass.c | 49 +++++++++++++++-------------- src/vulkan/runtime/vk_render_pass.h | 6 ++-- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 31ea757eba2..5189d59d36e 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -473,10 +473,6 @@ vk_common_CreateRenderPass2(VkDevice _device, for (uint32_t s = 0; s < pCreateInfo->subpassCount; s++) { const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[s]; struct vk_subpass *subpass = &pass->subpasses[s]; - const VkMultisampledRenderToSingleSampledInfoEXT *mrtss = - vk_find_struct_const(desc->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); - if (mrtss && !mrtss->multisampledRenderToSingleSampledEnable) - mrtss = NULL; subpass->attachment_count = num_subpass_attachments2(desc); subpass->attachments = next_subpass_attachment; @@ -503,6 +499,11 @@ vk_common_CreateRenderPass2(VkDevice _device, subpass->view_mask = desc->viewMask ? desc->viewMask : 1; pass->view_mask |= subpass->view_mask; + const VkMultisampledRenderToSingleSampledInfoEXT *mrtss_info = + vk_find_struct_const(desc->pNext, MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); + if (mrtss_info && mrtss_info->multisampledRenderToSingleSampledEnable) + subpass->mrtss_samples = mrtss_info->rasterizationSamples; + subpass->input_count = desc->inputAttachmentCount; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = next_subpass_attachment; @@ -577,7 +578,8 @@ vk_common_CreateRenderPass2(VkDevice _device, subpass->depth_stencil_resolve_attachment, pCreateInfo); } - if (subpass->depth_stencil_resolve_attachment || mrtss) { + if (subpass->depth_stencil_resolve_attachment || + subpass->mrtss_samples != 0) { /* From the Vulkan 1.3.204 spec: * * VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03178 @@ -768,15 +770,6 @@ vk_common_CreateRenderPass2(VkDevice _device, .stencilAttachmentFormat = stencil_format, .rasterizationSamples = samples, }; - - if (mrtss) { - assert(mrtss->multisampledRenderToSingleSampledEnable); - subpass->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { - .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, - .multisampledRenderToSingleSampledEnable = VK_TRUE, - .rasterizationSamples = mrtss->rasterizationSamples, - }; - } } assert(next_subpass_attachment == subpass_attachments + subpass_attachment_count); @@ -1166,11 +1159,14 @@ vk_get_command_buffer_inheritance_as_rendering_resume( __vk_append_struct(&data->rendering, &data->fsr_att); } - /* Append this one last because it lives in the subpass and we don't want - * to be changed by appending other structures later. - */ - if (subpass->mrtss.multisampledRenderToSingleSampledEnable) - __vk_append_struct(&data->rendering, (void *)&subpass->mrtss); + if (subpass->mrtss_samples != 0) { + data->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { + .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + .multisampledRenderToSingleSampledEnable = true, + .rasterizationSamples = subpass->mrtss_samples, + }; + __vk_append_struct(&data->rendering, (void *)&data->mrtss); + } return &data->rendering; } @@ -1969,7 +1965,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment_flags->flags = vk_attachment_description_flags_to_rendering_flags(resolve_att->flags); - } else if (subpass->mrtss.multisampledRenderToSingleSampledEnable && + } else if (subpass->mrtss_samples != 0 && rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { if (vk_format_is_int(att_state->image_view->format)) color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; @@ -2121,7 +2117,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } if (sp_att->resolve != NULL || - (subpass->mrtss.multisampledRenderToSingleSampledEnable && + (subpass->mrtss_samples != 0 && rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { const struct vk_subpass_attachment *res_sp_att = sp_att->resolve ? sp_att->resolve : sp_att; assert(res_sp_att->attachment < pass->attachment_count); @@ -2445,8 +2441,15 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, /* Append this one last because it lives in the subpass and we don't want * to be changed by appending other structures later. */ - if (subpass->mrtss.multisampledRenderToSingleSampledEnable) - __vk_append_struct(&rendering, (void *)&subpass->mrtss); + VkMultisampledRenderToSingleSampledInfoEXT mrtss; + if (subpass->mrtss_samples != 0) { + mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { + .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, + .multisampledRenderToSingleSampledEnable = true, + .rasterizationSamples = subpass->mrtss_samples, + }; + __vk_append_struct(&rendering, (void *)&mrtss); + } disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd_buffer), &rendering); diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 937ed1c1434..92ba8b3164c 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -120,6 +120,8 @@ struct vk_subpass { */ uint32_t view_mask; + VkSampleCountFlagBits mrtss_samples; + bool has_external_src_dependency; bool has_external_dst_dependency; @@ -174,9 +176,6 @@ struct vk_subpass { */ VkCommandBufferInheritanceRenderingInfo inheritance_info; - /** VkMultisampledRenderToSingleSampledInfoEXT for this subpass */ - VkMultisampledRenderToSingleSampledInfoEXT mrtss; - /** True if legacy dithering is enabled for this subpass. */ bool legacy_dithering_enabled; }; @@ -396,6 +395,7 @@ vk_get_rendering_attachment_flags(const VkRenderingAttachmentInfo *att); struct vk_gcbiarr_data { VkRenderingInfo rendering; VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_att; + VkMultisampledRenderToSingleSampledInfoEXT mrtss; VkRenderingAttachmentInfo attachments[]; }; From a0f9315f9a6bef650f519e0e376bde58d862ed09 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 14:54:39 -0500 Subject: [PATCH 05/41] vulkan/render_pass: Ignore DS resolve if we have no DS attachment --- src/vulkan/runtime/vk_render_pass.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 5189d59d36e..41dc18569b0 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -564,7 +564,7 @@ vk_common_CreateRenderPass2(VkDevice _device, vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); - if (ds_resolve) { + if (ds_resolve && subpass->depth_stencil_attachment) { if (ds_resolve->pDepthStencilResolveAttachment && ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) { subpass->depth_stencil_resolve_attachment = next_subpass_attachment++; From 1cd444a566308752974a6abfa7a79b8bb8a14394 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 15:37:10 -0500 Subject: [PATCH 06/41] vulkan/render_pass: Sort out resolve modes at create time We have all the information up-front and sorting them out there makes things a bit more straightforward when it comes time to actually build the rendering info. Doubly so in the case of multisample render to single sample. --- src/vulkan/runtime/vk_render_pass.c | 171 +++++++++++++++++----------- src/vulkan/runtime/vk_render_pass.h | 9 +- 2 files changed, 110 insertions(+), 70 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 41dc18569b0..92b00e3b40d 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -426,6 +426,15 @@ vk_subpass_attachment_link_resolve(struct vk_subpass_attachment *att, att->resolve = resolve; } +static VkResolveModeFlagBits +resolve_mode_for_format(VkFormat format) +{ + if (vk_format_is_int(format)) + return VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + else + return VK_RESOLVE_MODE_AVERAGE_BIT; +} + VKAPI_ATTR VkResult VKAPI_CALL vk_common_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo, @@ -549,6 +558,35 @@ vk_common_CreateRenderPass2(VkDevice _device, } } + for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { + struct vk_subpass_attachment *sp_att = &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + if (sp_att->resolve != NULL) { + assert(rp_att->samples > 1); + + assert(sp_att->resolve->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *resolve_rp_att = + &pass->attachments[sp_att->resolve->attachment]; + + if (resolve_rp_att->has_external_format) { + sp_att->resolve_mode = + VK_RESOLVE_MODE_EXTERNAL_FORMAT_DOWNSAMPLE_BIT_ANDROID; + } else { + assert(resolve_rp_att->format == rp_att->format); + sp_att->resolve_mode = resolve_mode_for_format(rp_att->format); + } + } else if (subpass->mrtss_samples != 0 && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { + sp_att->resolve_mode = resolve_mode_for_format(rp_att->format); + } + } + if (desc->pDepthStencilAttachment && desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { subpass->depth_stencil_attachment = next_subpass_attachment++; @@ -578,8 +616,39 @@ vk_common_CreateRenderPass2(VkDevice _device, subpass->depth_stencil_resolve_attachment, pCreateInfo); } + + struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + if (subpass->depth_stencil_resolve_attachment) + assert(rp_att->samples > 1); + + VkImageAspectFlags resolve_aspects = rp_att->aspects; + if (subpass->depth_stencil_resolve_attachment) { + struct vk_subpass_attachment *resolve_sp_att = + subpass->depth_stencil_resolve_attachment; + assert(resolve_sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *resolve_rp_att = + &pass->attachments[resolve_sp_att->attachment]; + + /* From the Vulkan 1.3.204 spec: + * + * "VkSubpassDescriptionDepthStencilResolve::depthResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a depth component. Similarly, + * VkSubpassDescriptionDepthStencilResolve::stencilResolveMode is + * ignored if the VkFormat of the pDepthStencilResolveAttachment + * does not have a stencil component." + */ + resolve_aspects &= resolve_rp_att->aspects; + } + if (subpass->depth_stencil_resolve_attachment || - subpass->mrtss_samples != 0) { + (subpass->mrtss_samples != 0 && + rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { /* From the Vulkan 1.3.204 spec: * * VUID-VkSubpassDescriptionDepthStencilResolve-pDepthStencilResolveAttachment-03178 @@ -591,9 +660,19 @@ vk_common_CreateRenderPass2(VkDevice _device, assert(ds_resolve->depthResolveMode != VK_RESOLVE_MODE_NONE || ds_resolve->stencilResolveMode != VK_RESOLVE_MODE_NONE); - subpass->depth_resolve_mode = ds_resolve->depthResolveMode; - subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode; + if (resolve_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + sp_att->resolve_mode = ds_resolve->depthResolveMode; + if (resolve_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + sp_att->stencil_resolve_mode = ds_resolve->stencilResolveMode; } + + /* Technicaly, I don't think this is supposed to happen but there's + * enough crazy around resolves and depth/stencil aspects that we may + * as well do one final check just to be sure. + */ + if (sp_att->resolve_mode == VK_RESOLVE_MODE_NONE && + sp_att->stencil_resolve_mode == VK_RESOLVE_MODE_NONE) + sp_att->resolve = NULL; } const VkFragmentShadingRateAttachmentInfoKHR *fsr_att_info = @@ -1939,6 +2018,11 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment->storeOp = VK_ATTACHMENT_STORE_OP_STORE; } + /* With multisample render to single sample, we may have a resolve even + * if we don't have a resolve attachment. + */ + color_attachment->resolveMode = sp_att->resolve_mode; + if (sp_att->resolve != NULL) { assert(sp_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = @@ -1952,12 +2036,6 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, const struct vk_render_pass_attachment *resolve_att = &pass->attachments[sp_att->resolve->attachment]; - if (resolve_att->has_external_format) - color_attachment->resolveMode = VK_RESOLVE_MODE_EXTERNAL_FORMAT_DOWNSAMPLE_BIT_ANDROID; - else if (vk_format_is_int(res_att_state->image_view->format)) - color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; - else - color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; color_attachment->resolveImageView = vk_image_view_to_handle(res_att_state->image_view); @@ -1965,12 +2043,6 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment_flags->flags = vk_attachment_description_flags_to_rendering_flags(resolve_att->flags); - } else if (subpass->mrtss_samples != 0 && - rp_att->samples == VK_SAMPLE_COUNT_1_BIT) { - if (vk_format_is_int(att_state->image_view->format)) - color_attachment->resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; - else - color_attachment->resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; } } @@ -2116,65 +2188,36 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, att_state->views[view].sample_locations = sample_locations; } - if (sp_att->resolve != NULL || - (subpass->mrtss_samples != 0 && - rp_att->samples == VK_SAMPLE_COUNT_1_BIT)) { - const struct vk_subpass_attachment *res_sp_att = sp_att->resolve ? sp_att->resolve : sp_att; - assert(res_sp_att->attachment < pass->attachment_count); - const struct vk_render_pass_attachment *res_rp_att = - &pass->attachments[res_sp_att->attachment]; + /* With multisample render to single sample, we may have a resolve even + * if we don't have a resolve attachment. + */ + depth_attachment.resolveMode = sp_att->resolve_mode; + stencil_attachment.resolveMode = sp_att->stencil_resolve_mode; + + if (sp_att->resolve != NULL) { + assert(sp_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = - &cmd_buffer->attachments[res_sp_att->attachment]; - - /* From the Vulkan 1.3.204 spec: - * - * "VkSubpassDescriptionDepthStencilResolve::depthResolveMode is - * ignored if the VkFormat of the pDepthStencilResolveAttachment - * does not have a depth component. Similarly, - * VkSubpassDescriptionDepthStencilResolve::stencilResolveMode is - * ignored if the VkFormat of the pDepthStencilResolveAttachment - * does not have a stencil component." - * - * TODO: Should we handle this here or when we create the render - * pass? Handling it here makes load ops "correct" in the sense - * that, if we resolve to the wrong aspect, we will still consider - * it bound and clear it if requested. - */ - VkResolveModeFlagBits depth_resolve_mode = VK_RESOLVE_MODE_NONE; - if (res_rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - depth_resolve_mode = subpass->depth_resolve_mode; - - VkResolveModeFlagBits stencil_resolve_mode = VK_RESOLVE_MODE_NONE; - if (res_rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - stencil_resolve_mode = subpass->stencil_resolve_mode; + &cmd_buffer->attachments[sp_att->resolve->attachment]; VkImageAspectFlags resolved_aspects = 0; - if (depth_resolve_mode != VK_RESOLVE_MODE_NONE) { - depth_attachment.resolveMode = depth_resolve_mode; - if (sp_att->resolve) { - depth_attachment.resolveImageView = - vk_image_view_to_handle(res_att_state->image_view); - depth_attachment.resolveImageLayout = - sp_att->resolve->layout; - } - + if (depth_attachment.resolveMode != VK_RESOLVE_MODE_NONE) { + depth_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + depth_attachment.resolveImageLayout = + sp_att->resolve->layout; resolved_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (stencil_resolve_mode != VK_RESOLVE_MODE_NONE) { - stencil_attachment.resolveMode = stencil_resolve_mode; - if (sp_att->resolve) { - stencil_attachment.resolveImageView = - vk_image_view_to_handle(res_att_state->image_view); - stencil_attachment.resolveImageLayout = - sp_att->resolve->stencil_layout; - } - + if (stencil_attachment.resolveMode != VK_RESOLVE_MODE_NONE) { + stencil_attachment.resolveImageView = + vk_image_view_to_handle(res_att_state->image_view); + stencil_attachment.resolveImageLayout = + sp_att->resolve->stencil_layout; resolved_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; } - if (sp_att->resolve && resolved_aspects == rp_att->aspects) { + if (resolved_aspects == rp_att->aspects) { /* The resolve attachment is entirely overwritten by the * resolve operation so the load op really doesn't matter. * We can consider the resolve as being the load. diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 92ba8b3164c..14264dde00f 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -75,6 +75,9 @@ struct vk_subpass_attachment { /** Resolve attachment, if any */ struct vk_subpass_attachment *resolve; + + VkResolveModeFlagBits resolve_mode; + VkResolveModeFlagBits stencil_resolve_mode; }; /***/ @@ -125,12 +128,6 @@ struct vk_subpass { bool has_external_src_dependency; bool has_external_dst_dependency; - /** VkSubpassDescriptionDepthStencilResolve::depthResolveMode */ - VkResolveModeFlagBits depth_resolve_mode; - - /** VkSubpassDescriptionDepthStencilResolve::stencilResolveMode */ - VkResolveModeFlagBits stencil_resolve_mode; - /** VkFragmentShadingRateAttachmentInfoKHR::shadingRateAttachmentTexelSize */ VkExtent2D fragment_shading_rate_attachment_texel_size; From 7fe0b2238ba8558ea304ce2daa1784b53f5ae59d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 21:38:09 -0500 Subject: [PATCH 07/41] vulkan/render_pass: Always use separate depth/stencil layouts There are no Vulkan drivers in Mesa that don't support this feature and it's a hard requirement for Vulkan 1.2 so there's no reason why we shouldn't also require it for the runtime render pass code. --- src/vulkan/runtime/vk_image.c | 38 +++++++++++++++++++++++++++++ src/vulkan/runtime/vk_image.h | 2 ++ src/vulkan/runtime/vk_render_pass.c | 30 +++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/src/vulkan/runtime/vk_image.c b/src/vulkan/runtime/vk_image.c index e3a13845a1b..79cb8fbbc6f 100644 --- a/src/vulkan/runtime/vk_image.c +++ b/src/vulkan/runtime/vk_image.c @@ -778,6 +778,44 @@ vk_image_layout_is_depth_only(VkImageLayout layout) } } +VkImageLayout +vk_image_layout_depth_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + return VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL; + + default: + return layout; + } +} + +VkImageLayout +vk_image_layout_stencil_only(VkImageLayout layout) +{ + switch (layout) { + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: + return VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: + return VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL; + + default: + return layout; + } +} + static VkResult vk_image_create_get_format_list_uncompressed(struct vk_device *device, const VkImageCreateInfo *pCreateInfo, diff --git a/src/vulkan/runtime/vk_image.h b/src/vulkan/runtime/vk_image.h index 92689e619e2..86c5be14b01 100644 --- a/src/vulkan/runtime/vk_image.h +++ b/src/vulkan/runtime/vk_image.h @@ -401,6 +401,8 @@ vk_image_view_subresource_range(const struct vk_image_view *view) bool vk_image_layout_is_read_only(VkImageLayout layout, VkImageAspectFlagBits aspect); bool vk_image_layout_is_depth_only(VkImageLayout layout); +VkImageLayout vk_image_layout_depth_only(VkImageLayout layout); +VkImageLayout vk_image_layout_stencil_only(VkImageLayout layout); VkImageUsageFlags vk_image_layout_to_usage_flags(VkImageLayout layout, VkImageAspectFlagBits aspect); diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 92b00e3b40d..66e60350883 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -356,6 +356,25 @@ vk_render_pass_attachment_init(struct vk_render_pass_attachment *att, .has_external_format = vk_android_rp_attachment_has_external_format(desc), }; + + /* We require separate stencil layotus */ + if (att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + att->initial_layout = vk_image_layout_depth_only(att->initial_layout); + att->final_layout = vk_image_layout_depth_only(att->final_layout); + } else if (att->aspects == VK_IMAGE_ASPECT_STENCIL_BIT) { + att->initial_layout = VK_IMAGE_LAYOUT_UNDEFINED; + att->final_layout = VK_IMAGE_LAYOUT_UNDEFINED; + } + + if (att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + att->initial_stencil_layout = + vk_image_layout_stencil_only(att->initial_stencil_layout); + att->final_stencil_layout = + vk_image_layout_stencil_only(att->final_stencil_layout); + } else { + assert(att->initial_stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + assert(att->final_stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + } } static void @@ -385,6 +404,17 @@ vk_subpass_attachment_init(struct vk_subpass_attachment *att, .stencil_layout = vk_att_ref_stencil_layout(ref, attachments), }; + /* We require separate stencil layotus */ + if (att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + att->layout = vk_image_layout_depth_only(att->layout); + else if (att->aspects == VK_IMAGE_ASPECT_STENCIL_BIT) + att->layout = VK_IMAGE_LAYOUT_UNDEFINED; + + if (att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + att->stencil_layout = vk_image_layout_stencil_only(att->stencil_layout); + else + assert(att->stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + switch (usage) { case VK_IMAGE_USAGE_TRANSFER_DST_BIT: break; /* No special aspect requirements */ From b615b3a3e90c95e73eb66a6750887abaaac416c3 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 11:14:30 -0500 Subject: [PATCH 08/41] vulkan/render_pass: Stop passing useless attachments This is a little nicer for debugging --- src/vulkan/runtime/vk_render_pass.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 66e60350883..ef206f1c7e2 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -2446,9 +2446,11 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .layerCount = pass->is_multiview ? 1 : framebuffer->layers, .viewMask = pass->is_multiview ? subpass->view_mask : 0, .colorAttachmentCount = subpass->color_count, - .pColorAttachments = color_attachments, - .pDepthAttachment = &depth_attachment, - .pStencilAttachment = &stencil_attachment, + .pColorAttachments = subpass->color_count > 0 ? color_attachments : NULL, + .pDepthAttachment = depth_attachment.imageView != VK_NULL_HANDLE ? + &depth_attachment : NULL, + .pStencilAttachment = stencil_attachment.imageView != VK_NULL_HANDLE ? + &stencil_attachment : NULL, }; if (subpass->legacy_dithering_enabled) From 664bea61a5d231baeadb89aabeb857080aaf835d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 16:16:42 -0500 Subject: [PATCH 09/41] vulkan/render_pass: Add a new vk_render helper struct This is what will be the core of subpass merging. It's a struct that describes a single render section, with the ability to create an empty one and selectively merge subpasses into it. This allows us to centralize all the logic around subpass merging into one place. --- src/vulkan/runtime/vk_render_pass.c | 470 ++++++++++++++++++++++++++++ 1 file changed, 470 insertions(+) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index ef206f1c7e2..1e933f921cd 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -31,6 +31,7 @@ #include "vk_format.h" #include "vk_framebuffer.h" #include "vk_image.h" +#include "vk_physical_device.h" #include "vk_util.h" #include "vk_android.h" @@ -465,6 +466,475 @@ resolve_mode_for_format(VkFormat format) return VK_RESOLVE_MODE_AVERAGE_BIT; } +struct vk_render { + uint32_t view_mask; + VkSampleCountFlagBits mrtss_samples; + VkSampleCountFlagBits attachment_samples; + bool legacy_dithering_enabled; + + struct vk_subpass_attachment color_attachments[MESA_VK_MAX_COLOR_ATTACHMENTS]; + struct vk_subpass_attachment depth_stencil_attachment; + + VkExtent2D fragment_shading_rate_attachment_texel_size; + struct vk_subpass_attachment fragment_shading_rate_attachment; +}; + +static void +vk_render_init_empty(struct vk_render *render) +{ + *render = (struct vk_render) { + .depth_stencil_attachment = { + .attachment = VK_ATTACHMENT_UNUSED, + }, + .fragment_shading_rate_attachment = { + .attachment = VK_ATTACHMENT_UNUSED, + }, + }; + + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + render->color_attachments[c] = (struct vk_subpass_attachment) { + .attachment = VK_ATTACHMENT_UNUSED, + }; + } +} + +#ifndef NDEBUG +static bool +vk_render_has_used_attachment(const struct vk_render *render) +{ + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + return true; + } + + if (render->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) + return true; + + return false; +} +#endif + +static bool +vk_render_has_attachments(const struct vk_render *render) +{ + assert(vk_render_has_used_attachment(render) == + (render->attachment_samples != 0)); + return render->attachment_samples != 0; +} + +static bool +vk_render_is_empty(const struct vk_render *render) +{ + if (render->view_mask == 0) + assert(!vk_render_has_attachments(render)); + + return render->view_mask == 0; +} + +static uint32_t +vk_subpass_attachment_samples(const struct vk_render_pass *pass, + const struct vk_subpass_attachment *sp_att) +{ + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + return 0; + + assert(sp_att->attachment < pass->attachment_count); + return pass->attachments[sp_att->attachment].samples; +} + +static void +vk_render_init_for_single_subpass(struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + vk_render_init_empty(render); + + render->view_mask = subpass->view_mask; + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + render->color_attachments[a] = *sp_att; + render->attachment_samples |= vk_subpass_attachment_samples(pass, sp_att); + } + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + render->depth_stencil_attachment = *sp_att; + render->attachment_samples |= vk_subpass_attachment_samples(pass, sp_att); + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + render->fragment_shading_rate_attachment = + *subpass->fragment_shading_rate_attachment; + } +} + +static bool +vk_render_can_add_subpass(const struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + const struct vk_physical_device *device = pass->base.device->physical; + + /* We can always add to an empty render */ + if (vk_render_is_empty(render)) + return true; + + /* If the subpass has no attachments, then the sample count is determined + * based on dynamic state and/or pipelines. Merging two of those is + * probably safe if variableMultisampleRate is supported but there also + * doesn't seem to be much point in doing so since there's no tile memory + * to share anyway. We definitely can't merge one with attachments into + * one without. + */ + if (!vk_render_has_attachments(render) || + (subpass->color_count == 0 && subpass->depth_stencil_attachment == NULL)) + return false; + + /* View mask is per-render */ + if (render->view_mask != subpass->view_mask) + return false; + + /* Multisample render to single sample does a multisampled render and then + * immediately resolves the results, leaving no multisampling at the end of + * the subpass. If the previous subpass did MRTSS then it makes no sense + * to merge something into the end of it because that subpass would have + * access to the per-sample values, not the resolved values. + * + * However, if the sample counts otheriwse match, then it's fine to have a + * regular multisampled subpass merged with a MRTSS subpass right after it. + */ + if (render->mrtss_samples != 0) + return false; + + if (subpass->mrtss_samples != 0 && + subpass->mrtss_samples != render->attachment_samples) + return false; + + if (subpass->legacy_dithering_enabled != render->legacy_dithering_enabled) + return false; + + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + color_count++; + } + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + /* We could theoretically merge subpasses with different sample counts + * if we have VK_AMD_mixed_attachment_samples or + * VK_NV_framebuffer_mixed_samples but the rules are weird and hard to + * follow and those drivers aren't going to benefit much from subpass + * merging anyway. Just reject merges with differing sample counts. + */ + if (vk_subpass_attachment_samples(pass, sp_att) != + render->attachment_samples) + return false; + + bool found = false; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = + &render->color_attachments[c]; + if (r_att->attachment == sp_att->attachment) { + /* If a previous subpass resolves this attachment then we need to + * do that resolve BEFORE this subpass writes anything. We can't + * merge them. + */ + if (r_att->resolve != NULL) + return false; + + found = true; + break; + } + } + + /* If we didn't find it, we'll need a new attachment */ + if (!found) + color_count++; + } + + if (color_count > device->properties.maxColorAttachments) + return false; + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + const struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + + /* We can only have one depth/stencil attachment bound and can't switch + * it mid-rendering. If there's a mismatch, we can't merge. + */ + assert(sp_att->attachment != VK_ATTACHMENT_UNUSED); + if (r_att->attachment != VK_ATTACHMENT_UNUSED && + r_att->attachment != sp_att->attachment) + return false; + + /* See the similar check for color above */ + if (vk_subpass_attachment_samples(pass, sp_att) != + render->attachment_samples) + return false; + + /* If a previous subpass resolves depth/stencil then we need to do that + * resolve BEFORE this subpass writes anything. We can't merge them + * unless this subpass treats the depth/stencil atachment as read-only. + */ + if (r_att->resolve != NULL) { + if ((sp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + !vk_image_layout_is_read_only(sp_att->layout, + VK_IMAGE_ASPECT_DEPTH_BIT)) + return false; + + if ((sp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + !vk_image_layout_is_read_only(sp_att->stencil_layout, + VK_IMAGE_ASPECT_STENCIL_BIT)) + return false; + + /* Or if we also have a resolve attachment. We can't resolve twice + * with a single CmdBegin/EndRendering(). + */ + if (r_att->resolve != NULL) + return false; + } + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + const struct vk_subpass_attachment *r_att = + &render->fragment_shading_rate_attachment; + + /* We can't use a fragment shading rate attachment that was written as a + * color attachment in a previous subpass. This shouldn't even really + * be possible given size and other constraints but it doesn't hurt to + * check. + */ + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render->color_attachments[c].attachment == sp_att->attachment) + return false; + } + + /* We can only have one fragment shading rate attachment bound and can't + * switch it mid-rendering. If there's a mismatch, we can't merge. + */ + assert(r_att->attachment != VK_ATTACHMENT_UNUSED); + if (r_att->attachment != VK_ATTACHMENT_UNUSED && + r_att->attachment != sp_att->attachment) + return false; + + if (subpass->fragment_shading_rate_attachment_texel_size.width != + render->fragment_shading_rate_attachment_texel_size.width || + subpass->fragment_shading_rate_attachment_texel_size.height != + render->fragment_shading_rate_attachment_texel_size.height) + return false; + } + + return true; +} + +/* Earlier layouts win. We prefer We prefer read/write over read-only and + * GENERAL over specific layouts because, presumably, the client had a + * reason for specifying GENERAL. + */ +static const VkImageLayout color_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout depth_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout stencil_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static const VkImageLayout fsr_layout_ranking[] = { + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, + VK_IMAGE_LAYOUT_MAX_ENUM, +}; + +static VkImageLayout +choose_best_layout(VkImageLayout a, VkImageLayout b, + const VkImageLayout *ranking) +{ + if (a == b) + return a; + + for (uint32_t i = 0; ranking[i] != VK_IMAGE_LAYOUT_MAX_ENUM; i++) { + if (ranking[i] == a) + return a; + if (ranking[i] == b) + return b; + } + + UNREACHABLE("Invalid layout for aspect"); + return VK_IMAGE_LAYOUT_GENERAL; +} + +static void +vk_subpass_attachment_merge(struct vk_subpass_attachment *r_att, + const struct vk_subpass_attachment *sp_att, + const VkImageLayout *layout_ranking) +{ + assert(r_att->aspects == sp_att->aspects); + r_att->usage |= sp_att->usage; + + r_att->layout = choose_best_layout(r_att->layout, sp_att->layout, + layout_ranking); + if (r_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + r_att->stencil_layout = choose_best_layout(r_att->stencil_layout, + sp_att->stencil_layout, + stencil_layout_ranking); + } else { + assert(r_att->stencil_layout == VK_IMAGE_LAYOUT_UNDEFINED); + } + + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + + assert(r_att->resolve_mode == VK_RESOLVE_MODE_NONE); + assert(r_att->stencil_resolve_mode == VK_RESOLVE_MODE_NONE); + r_att->resolve_mode = sp_att->resolve_mode; + r_att->stencil_resolve_mode = sp_att->stencil_resolve_mode; + + if (sp_att->resolve != NULL) { + assert(r_att->resolve == NULL); + r_att->resolve = sp_att->resolve; + } +} + +static void +vk_render_add_subpass(struct vk_render *render, + const struct vk_render_pass *pass, + const struct vk_subpass *subpass) +{ + assert(vk_render_can_add_subpass(render, pass, subpass)); + + if (render->view_mask == 0) + render->view_mask = subpass->view_mask; + else + assert(render->view_mask == subpass->view_mask); + + if (subpass->mrtss_samples != 0) { + assert(render->mrtss_samples == 0); + render->mrtss_samples = subpass->mrtss_samples; + } + + render->legacy_dithering_enabled |= subpass->legacy_dithering_enabled; + + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + ASSERTED bool found_or_inserted = false; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + struct vk_subpass_attachment *r_att = &render->color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + *r_att = *sp_att; + render->attachment_samples |= + vk_subpass_attachment_samples(pass, sp_att); + found_or_inserted = true; + break; + } else if (r_att->attachment == sp_att->attachment) { + vk_subpass_attachment_merge(r_att, sp_att, color_layout_ranking); + found_or_inserted = true; + break; + } + } + assert(found_or_inserted); + } + + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + *r_att = *sp_att; + render->attachment_samples |= + vk_subpass_attachment_samples(pass, sp_att); + } else { + vk_subpass_attachment_merge(r_att, sp_att, depth_layout_ranking); + } + } + + if (subpass->fragment_shading_rate_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->fragment_shading_rate_attachment; + struct vk_subpass_attachment *r_att = + &render->fragment_shading_rate_attachment; + + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + render->fragment_shading_rate_attachment_texel_size = + subpass->fragment_shading_rate_attachment_texel_size; + *r_att = *sp_att; + } else { + assert(subpass->fragment_shading_rate_attachment_texel_size.width == + render->fragment_shading_rate_attachment_texel_size.width); + assert(subpass->fragment_shading_rate_attachment_texel_size.height == + render->fragment_shading_rate_attachment_texel_size.height); + vk_subpass_attachment_merge(r_att, sp_att, fsr_layout_ranking); + } + } + + /* We don't actually bind input attachments directly here. However, since + * we extend the lifeteims of subpass attachments, we need to take input + * attachments into account in the first/last_subpass flags so they + * accurately represent the entire range described by the render. + */ + for (uint32_t a = 0; a < subpass->input_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->input_attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + if (sp_att->first_subpass == 0 && sp_att->last_subpass == 0) + continue; + + struct vk_subpass_attachment *r_att; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + r_att = &render->color_attachments[c]; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + break; + } + } + + r_att = &render->depth_stencil_attachment; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + } + + r_att = &render->fragment_shading_rate_attachment; + if (r_att->attachment == sp_att->attachment) { + r_att->first_subpass |= sp_att->first_subpass; + r_att->last_subpass |= sp_att->last_subpass; + } + } +} + VKAPI_ATTR VkResult VKAPI_CALL vk_common_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo, From 2bbb1088053f14389b8ae8ba120a407bdbaa383d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 05:15:28 -0500 Subject: [PATCH 10/41] vulkan/render_pass: Pull pipeline info init into a helper This also changes it to be entirely based on the subpass and not API-level descriptors. --- src/vulkan/runtime/vk_render_pass.c | 245 +++++++++++++++------------- 1 file changed, 131 insertions(+), 114 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 1e933f921cd..48226250705 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -935,6 +935,136 @@ vk_render_add_subpass(struct vk_render *render, } } +static uint32_t +vk_subpass_get_input_attachment_index(const struct vk_subpass *subpass, + uint32_t attachment) +{ + uint32_t input_index = VK_ATTACHMENT_UNUSED; + for (uint32_t i = 0; i < subpass->input_count; i++) { + if (subpass->input_attachments[i].attachment == attachment) { + assert(input_index == VK_ATTACHMENT_UNUSED); + input_index = i; + } + } + return input_index; +} + +static void +vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + + VkSampleCountFlagBits samples = 0; + for (uint32_t a = 0; a < subpass->color_count; a++) { + const struct vk_subpass_attachment *sp_att = + &subpass->color_attachments[a]; + + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { + subpass->ial.colors[a] = VK_ATTACHMENT_UNUSED; + subpass->color_attachment_formats[a] = VK_FORMAT_UNDEFINED; + subpass->sample_count_amd.samples[a] = VK_SAMPLE_COUNT_1_BIT; + } else { + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + subpass->ial.colors[a] = + vk_subpass_get_input_attachment_index(subpass, sp_att->attachment); + + subpass->color_attachment_formats[a] = rp_att->format; + subpass->sample_count_amd.samples[a] = rp_att->samples; + + samples |= rp_att->samples; + } + } + + subpass->ial.depth = VK_ATTACHMENT_UNUSED; + subpass->ial.stencil = VK_ATTACHMENT_UNUSED; + + VkFormat depth_format = VK_FORMAT_UNDEFINED; + VkFormat stencil_format = VK_FORMAT_UNDEFINED; + VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; + if (subpass->depth_stencil_attachment != NULL) { + const struct vk_subpass_attachment *sp_att = + subpass->depth_stencil_attachment; + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + const uint32_t ia_idx = + vk_subpass_get_input_attachment_index(subpass, sp_att->attachment); + + if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + depth_format = rp_att->format; + subpass->ial.depth = ia_idx; + } + + if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + stencil_format = rp_att->format; + subpass->ial.stencil = ia_idx; + } + + depth_stencil_samples = rp_att->samples; + + samples |= rp_att->samples; + } + + subpass->sample_count_amd.info = (VkAttachmentSampleCountInfoAMD) { + .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, + .pNext = NULL, + .colorAttachmentCount = subpass->color_count, + .pColorAttachmentSamples = subpass->sample_count_amd.samples, + .depthStencilAttachmentSamples = depth_stencil_samples, + }; + + subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { + .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, + .pNext = &subpass->sample_count_amd.info, + .colorAttachmentCount = subpass->color_count, + .pColorAttachmentInputIndices = subpass->ial.colors, + /* From the Vulkan 1.3.204 spec: + * + * VUID-vkCmdDraw-OpTypeImage-07468 + * + * "If any shader executed by this pipeline accesses an OpTypeImage + * variable with a Dim operand of SubpassData, it must be decorated + * with an InputAttachmentIndex that corresponds to a valid input + * attachment in the current subpass." + * + * So we don't have to worry about the missing InputAttachmentIndex + * decoration (AKA NO_INDEX) here, the depth/stencil attachment is + * either not used as an input attachment, or it has an explicit + * index. + */ + .pDepthInputAttachmentIndex = &subpass->ial.depth, + .pStencilInputAttachmentIndex = &subpass->ial.stencil, + }; + + subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, + .pNext = &subpass->ial.info, + .viewMask = pass->is_multiview ? subpass->view_mask : 0, + .colorAttachmentCount = subpass->color_count, + .pColorAttachmentFormats = subpass->color_attachment_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + }; + + subpass->inheritance_info = (VkCommandBufferInheritanceRenderingInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, + .pNext = &subpass->ial.info, + /* If we're inheriting, the contents are clearly in secondaries */ + .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, + .viewMask = pass->is_multiview ? subpass->view_mask : 0, + .colorAttachmentCount = subpass->color_count, + .pColorAttachmentFormats = subpass->color_attachment_formats, + .depthAttachmentFormat = depth_format, + .stencilAttachmentFormat = stencil_format, + .rasterizationSamples = samples, + }; +} + VKAPI_ATTR VkResult VKAPI_CALL vk_common_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo, @@ -1235,120 +1365,7 @@ vk_common_CreateRenderPass2(VkDevice _device, } } - VkSampleCountFlagBits samples = 0; - if (desc->colorAttachmentCount > 0) { - for (uint32_t a = 0; a < desc->colorAttachmentCount; a++) { - const VkAttachmentReference2 *ref = &desc->pColorAttachments[a]; - if (ref->attachment >= pCreateInfo->attachmentCount) { - subpass->ial.colors[a] = VK_ATTACHMENT_UNUSED; - subpass->color_attachment_formats[a] = VK_FORMAT_UNDEFINED; - subpass->sample_count_amd.samples[a] = VK_SAMPLE_COUNT_1_BIT; - } else { - const VkAttachmentDescription2 *att = - &pCreateInfo->pAttachments[ref->attachment]; - - for (uint32_t j = 0; j < subpass->input_count; j++) { - if (subpass->input_attachments[j].attachment == - subpass->color_attachments[a].attachment) { - subpass->ial.colors[a] = j; - } - } - - subpass->color_attachment_formats[a] = att->format; - subpass->sample_count_amd.samples[a] = att->samples; - - samples |= att->samples; - } - } - } - - subpass->ial.depth = VK_ATTACHMENT_UNUSED; - subpass->ial.stencil = VK_ATTACHMENT_UNUSED; - - VkFormat depth_format = VK_FORMAT_UNDEFINED; - VkFormat stencil_format = VK_FORMAT_UNDEFINED; - VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; - if (desc->pDepthStencilAttachment != NULL) { - const VkAttachmentReference2 *ref = desc->pDepthStencilAttachment; - if (ref->attachment < pCreateInfo->attachmentCount) { - const VkAttachmentDescription2 *att = - &pCreateInfo->pAttachments[ref->attachment]; - uint32_t ia_idx = VK_ATTACHMENT_UNUSED; - - for (uint32_t j = 0; j < subpass->input_count; j++) { - if (subpass->input_attachments[j].attachment == ref->attachment) - ia_idx = j; - } - - if (vk_format_has_depth(att->format)) { - depth_format = att->format; - subpass->ial.depth = ia_idx; - } - - if (vk_format_has_stencil(att->format)) { - stencil_format = att->format; - subpass->ial.stencil = ia_idx; - } - - depth_stencil_samples = att->samples; - - samples |= att->samples; - } - } - - subpass->sample_count_amd.info = (VkAttachmentSampleCountInfoAMD) { - .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, - .pNext = NULL, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentSamples = subpass->sample_count_amd.samples, - .depthStencilAttachmentSamples = depth_stencil_samples, - }; - - subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { - .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, - .pNext = &subpass->sample_count_amd.info, - .colorAttachmentCount = subpass->color_count, - .pColorAttachmentInputIndices = subpass->ial.colors, - /* From the Vulkan 1.3.204 spec: - * - * VUID-vkCmdDraw-OpTypeImage-07468 - * - * "If any shader executed by this pipeline accesses an OpTypeImage - * variable with a Dim operand of SubpassData, it must be decorated - * with an InputAttachmentIndex that corresponds to a valid input - * attachment in the current subpass." - * - * So we don't have to worry about the missing InputAttachmentIndex - * decoration (AKA NO_INDEX) here, the depth/stencil attachment is - * either not used as an input attachment, or it has an explicit - * index. - */ - .pDepthInputAttachmentIndex = &subpass->ial.depth, - .pStencilInputAttachmentIndex = &subpass->ial.stencil, - }; - - subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, - .pNext = &subpass->ial.info, - .viewMask = desc->viewMask, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = subpass->color_attachment_formats, - .depthAttachmentFormat = depth_format, - .stencilAttachmentFormat = stencil_format, - }; - - subpass->inheritance_info = (VkCommandBufferInheritanceRenderingInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, - .pNext = &subpass->ial.info, - /* If we're inheriting, the contents are clearly in secondaries */ - .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, - .viewMask = desc->viewMask, - .colorAttachmentCount = desc->colorAttachmentCount, - .pColorAttachmentFormats = subpass->color_attachment_formats, - .depthAttachmentFormat = depth_format, - .stencilAttachmentFormat = stencil_format, - .rasterizationSamples = samples, - }; + vk_subpass_init_pipeline_infos(pass, s); } assert(next_subpass_attachment == subpass_attachments + subpass_attachment_count); From d29bd070d6d388b51c33896df72b308f69ce7b1a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 12:16:42 -0500 Subject: [PATCH 11/41] vulkan/render_pass: Initialize pipeline info relative to a vk_render For now, this should be a no-op because the vk_render always represents a single subpass. However, this will eventually allow us to compute or re-compute pipeline info relative to a merged render. --- src/vulkan/runtime/vk_render_pass.c | 76 ++++++++++++++++------------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 48226250705..641020de907 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -951,32 +951,38 @@ vk_subpass_get_input_attachment_index(const struct vk_subpass *subpass, static void vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, - uint32_t subpass_idx) + uint32_t subpass_idx, + const struct vk_render *render) { struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; - VkSampleCountFlagBits samples = 0; - for (uint32_t a = 0; a < subpass->color_count; a++) { - const struct vk_subpass_attachment *sp_att = - &subpass->color_attachments[a]; + STATIC_ASSERT(VK_ATTACHMENT_UNUSED == (uint32_t)-1); + memset(subpass->ial.colors, -1, sizeof(subpass->ial.colors)); - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { - subpass->ial.colors[a] = VK_ATTACHMENT_UNUSED; - subpass->color_attachment_formats[a] = VK_FORMAT_UNDEFINED; - subpass->sample_count_amd.samples[a] = VK_SAMPLE_COUNT_1_BIT; - } else { - assert(sp_att->attachment < pass->attachment_count); - const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + STATIC_ASSERT(VK_FORMAT_UNDEFINED == 0); + memset(subpass->color_attachment_formats, 0, + sizeof(subpass->color_attachment_formats)); - subpass->ial.colors[a] = - vk_subpass_get_input_attachment_index(subpass, sp_att->attachment); + memset(subpass->sample_count_amd.samples, 0, + sizeof(subpass->sample_count_amd.samples)); - subpass->color_attachment_formats[a] = rp_att->format; - subpass->sample_count_amd.samples[a] = rp_att->samples; + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = &render->color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) + continue; - samples |= rp_att->samples; - } + assert(r_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[r_att->attachment]; + + color_count = MAX2(color_count, c + 1); + + subpass->ial.colors[c] = + vk_subpass_get_input_attachment_index(subpass, r_att->attachment); + + subpass->color_attachment_formats[c] = rp_att->format; + subpass->sample_count_amd.samples[c] = rp_att->samples; } subpass->ial.depth = VK_ATTACHMENT_UNUSED; @@ -985,15 +991,15 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, VkFormat depth_format = VK_FORMAT_UNDEFINED; VkFormat stencil_format = VK_FORMAT_UNDEFINED; VkSampleCountFlagBits depth_stencil_samples = VK_SAMPLE_COUNT_1_BIT; - if (subpass->depth_stencil_attachment != NULL) { - const struct vk_subpass_attachment *sp_att = - subpass->depth_stencil_attachment; - assert(sp_att->attachment < pass->attachment_count); + if (render->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render->depth_stencil_attachment; + assert(r_att->attachment < pass->attachment_count); const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + &pass->attachments[r_att->attachment]; const uint32_t ia_idx = - vk_subpass_get_input_attachment_index(subpass, sp_att->attachment); + vk_subpass_get_input_attachment_index(subpass, r_att->attachment); if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { depth_format = rp_att->format; @@ -1006,14 +1012,12 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, } depth_stencil_samples = rp_att->samples; - - samples |= rp_att->samples; } subpass->sample_count_amd.info = (VkAttachmentSampleCountInfoAMD) { .sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, .pNext = NULL, - .colorAttachmentCount = subpass->color_count, + .colorAttachmentCount = color_count, .pColorAttachmentSamples = subpass->sample_count_amd.samples, .depthStencilAttachmentSamples = depth_stencil_samples, }; @@ -1021,7 +1025,7 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, .pNext = &subpass->sample_count_amd.info, - .colorAttachmentCount = subpass->color_count, + .colorAttachmentCount = color_count, .pColorAttachmentInputIndices = subpass->ial.colors, /* From the Vulkan 1.3.204 spec: * @@ -1044,8 +1048,8 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, subpass->pipeline_info = (VkPipelineRenderingCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, .pNext = &subpass->ial.info, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, - .colorAttachmentCount = subpass->color_count, + .viewMask = pass->is_multiview ? render->view_mask : 0, + .colorAttachmentCount = color_count, .pColorAttachmentFormats = subpass->color_attachment_formats, .depthAttachmentFormat = depth_format, .stencilAttachmentFormat = stencil_format, @@ -1056,12 +1060,12 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, .pNext = &subpass->ial.info, /* If we're inheriting, the contents are clearly in secondaries */ .flags = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, - .colorAttachmentCount = subpass->color_count, + .viewMask = pass->is_multiview ? render->view_mask : 0, + .colorAttachmentCount = color_count, .pColorAttachmentFormats = subpass->color_attachment_formats, .depthAttachmentFormat = depth_format, .stencilAttachmentFormat = stencil_format, - .rasterizationSamples = samples, + .rasterizationSamples = render->attachment_samples, }; } @@ -1365,7 +1369,9 @@ vk_common_CreateRenderPass2(VkDevice _device, } } - vk_subpass_init_pipeline_infos(pass, s); + struct vk_render render; + vk_render_init_for_single_subpass(&render, pass, subpass); + vk_subpass_init_pipeline_infos(pass, s, &render); } assert(next_subpass_attachment == subpass_attachments + subpass_attachment_count); From e4e9c8c9c5efb04dac608561bfd98c70e25e0001 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 17:07:17 -0500 Subject: [PATCH 12/41] vulkan/render_pass: Stop passing pNexts to CmdSetRenderingInputAttachmentIndices() Nothing looks at the pNext currently but it's probably still best not to have a bunch of pointless stuff chained in there. --- src/vulkan/runtime/vk_render_pass.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 641020de907..6a8b9869a42 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -3047,8 +3047,10 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * So I'm not sure this CmdSetRenderingInputAttachmentIndices() is * really needed, but let's keep it to play by the rules. */ + VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; + ial_tmp.pNext = NULL; disp->CmdSetRenderingInputAttachmentIndices(vk_command_buffer_to_handle(cmd_buffer), - &subpass->ial.info); + &ial_tmp); } STACK_ARRAY_FINISH(color_attachments); From de95b490cbf88b578f9c6723235fad6d2a275b73 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 16:17:39 -0500 Subject: [PATCH 13/41] vulkan: Add a new pseudo-extension for remapping attachments Even though this one looks a lot like the color attachment locations part of KHR_dynamic_rendering_local_read, it's different in that it remaps EVERYTHING, not just shader locations. In order to implement this extensions, the driver has to be able to remap shader locations, blend constants, color write enables, clears. Everything. In exchange for this, the driver gets this new struct chained in everywhere so it always has this information any place where it would have a Vulkan 1.0 render pass. It's available in secondaries, at pipeline compile time, etc. Also, the two extensions will never be used at the same time so the driver doesn't need to worry too much about that, as long as both work. The other addition this extension makes is a depth/stencil attachment enable which is separate from the standard dynamic depth/stencil state. This is used to allow the render pass code to disable depth testing in subpasses which don't have a depth buffer while leaving the depth buffer bound from a previous subpass. --- src/vulkan/util/vk_internal_exts.h | 39 ++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/vulkan/util/vk_internal_exts.h b/src/vulkan/util/vk_internal_exts.h index 497d28bd40b..2d665bb809a 100644 --- a/src/vulkan/util/vk_internal_exts.h +++ b/src/vulkan/util/vk_internal_exts.h @@ -15,6 +15,8 @@ #ifndef VK_INTERNAL_EXTS_H #define VK_INTERNAL_EXTS_H +#include "vulkan/runtime/vk_limits.h" + #include #include @@ -123,6 +125,43 @@ typedef struct VkRenderingAttachmentInitialLayoutInfoMESA { #define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA_cast \ VkRenderingAttachmentInitialLayoutInfoMESA +/* Can extend any of the following: + * + * - VkPipelineCreateInfo + * - VkRenderingInfo + * - VkCommandBufferInheritanceRenderingInfo + * + * When chained into VkRenderingInfo, the driver must first begin the render, + * including any attachment loads, and then set the remap. The remap does not + * apply to the VkRenderingInfo itself. + */ +typedef struct VkRenderingAttachmentRemapInfoMESA { + VkStructureType sType; + const void* pNext; + + /** A mapping from attachments (as per the vkBeginRendering() numbering) + * to logical attachments used by other Vulkan commands such + * CmdClearAttachments() or CmdSetColorBlendEquationEXT(). + * + * Unlike VkRenderingAttachmentLocationInfo, this applies to all Vulkan + * commands and structs other than CmdBeginRendering() and + * VkCommandBufferInheritanceRenderingInfo, into which it can be chained. + */ + uint32_t colorAttachmentRemap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** True if the depth/stencil attachment should be enabled. + * + * If false, the driver will behave as if the depth attachment is not + * present, even though it may still be bound. This implies disabling the + * depth and stencil tests as well as depth writes. + */ + VkBool32 depthStencilAttachmentEnable; +} VkRenderingAttachmentRemapInfoMESA; + +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA \ + (VkStructureType)1000044902 +#define VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA_cast \ + VkRenderingAttachmentRemapInfoMESA struct nir_shader; From 96f56a89975884e50ecd5f93b38f7be3f4160628 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 16:12:28 -0500 Subject: [PATCH 14/41] vulkan/render_pass: Populate VkRenderingAttachmentRemapInfoMESA For now, this will just be an identity mapping as there's noting really to remap. However, we need this plumbing before we can hook it up in vk_graphics_state.c. --- src/vulkan/runtime/vk_render_pass.c | 43 ++++++++++++++++++++++++++++- src/vulkan/runtime/vk_render_pass.h | 20 ++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 6a8b9869a42..0d63e6ac55e 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -935,6 +935,20 @@ vk_render_add_subpass(struct vk_render *render, } } +static uint32_t +vk_subpass_get_color_attachment_index(const struct vk_subpass *subpass, + uint32_t attachment) +{ + uint32_t color_index = VK_ATTACHMENT_UNUSED; + for (uint32_t i = 0; i < subpass->color_count; i++) { + if (subpass->color_attachments[i].attachment == attachment) { + assert(color_index == VK_ATTACHMENT_UNUSED); + color_index = i; + } + } + return color_index; +} + static uint32_t vk_subpass_get_input_attachment_index(const struct vk_subpass *subpass, uint32_t attachment) @@ -955,9 +969,11 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, const struct vk_render *render) { struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + uint32_t rar_colors[MESA_VK_MAX_COLOR_ATTACHMENTS]; STATIC_ASSERT(VK_ATTACHMENT_UNUSED == (uint32_t)-1); memset(subpass->ial.colors, -1, sizeof(subpass->ial.colors)); + memset(rar_colors, -1, sizeof(rar_colors)); STATIC_ASSERT(VK_FORMAT_UNDEFINED == 0); memset(subpass->color_attachment_formats, 0, @@ -978,6 +994,8 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, color_count = MAX2(color_count, c + 1); + rar_colors[c] = + vk_subpass_get_color_attachment_index(subpass, r_att->attachment); subpass->ial.colors[c] = vk_subpass_get_input_attachment_index(subpass, r_att->attachment); @@ -1022,9 +1040,20 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, .depthStencilAttachmentSamples = depth_stencil_samples, }; + subpass->rar_info = (VkRenderingAttachmentRemapInfoMESA) { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_REMAP_INFO_MESA, + .pNext = &subpass->sample_count_amd.info, + .depthStencilAttachmentEnable = + subpass->depth_stencil_attachment != NULL, + }; + STATIC_ASSERT(sizeof(subpass->rar_info.colorAttachmentRemap) == + sizeof(rar_colors)); + memcpy(subpass->rar_info.colorAttachmentRemap, + rar_colors, sizeof(rar_colors)); + subpass->ial.info = (VkRenderingInputAttachmentIndexInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_INPUT_ATTACHMENT_INDEX_INFO, - .pNext = &subpass->sample_count_amd.info, + .pNext = &subpass->rar_info, .colorAttachmentCount = color_count, .pColorAttachmentInputIndices = subpass->ial.colors, /* From the Vulkan 1.3.204 spec: @@ -1527,6 +1556,18 @@ vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info) return vk_find_struct_const(info->pNext, PIPELINE_RENDERING_CREATE_INFO); } +const VkRenderingAttachmentRemapInfoMESA * +vk_get_pipeline_rendering_ar_info(const VkGraphicsPipelineCreateInfo *info) +{ + VK_FROM_HANDLE(vk_render_pass, render_pass, info->renderPass); + if (render_pass != NULL) { + assert(info->subpass < render_pass->subpass_count); + return &render_pass->subpasses[info->subpass].rar_info; + } + + return NULL; +} + const VkRenderingInputAttachmentIndexInfo * vk_get_pipeline_rendering_ial_info(const VkGraphicsPipelineCreateInfo *info) { diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 14264dde00f..1a912b3dd85 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -143,6 +143,12 @@ struct vk_subpass { VkSampleCountFlagBits samples[MESA_VK_MAX_COLOR_ATTACHMENTS]; } sample_count_amd; + /** VkRenderingAttachmentRemapInfoMESA for this subpass + * + * This is in the pNext chain of pipeline_info and inheritance_info. + */ + VkRenderingAttachmentRemapInfoMESA rar_info; + /** VkRenderingInputAttachmentIndexInfo for this subpass * * This is in the pNext chain of pipeline_info and inheritance_info. @@ -315,6 +321,20 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vk_render_pass, base, VkRenderPass, const VkPipelineRenderingCreateInfo * vk_get_pipeline_rendering_create_info(const VkGraphicsPipelineCreateInfo *info); +/** Returns the VkRenderingAttachmentRemapInfoMESA for a graphics pipeline + * + * For render-pass-free drivers, this can be used in the implementation of + * vkCreateGraphicsPipelines to get the VkRenderingAttachmentRemapInfoMESA. + * If VkGraphicsPipelineCreateInfo::renderPass is not VK_NULL_HANDLE, it will + * return the VkRenderingAttachmentRemapInfoMESA for the specified subpass. + * VkGraphicsPipelineCreateInfo::renderPass is VK_NULL_HANDLE, it will return + * NULL. + * + * :param info: |in| One of the pCreateInfos from vkCreateGraphicsPipelines + */ +const VkRenderingAttachmentRemapInfoMESA * +vk_get_pipeline_rendering_ar_info(const VkGraphicsPipelineCreateInfo *info); + /** Returns the VkRenderingInputAttachmentIndexInfo for a graphics pipeline * * For render-pass-free drivers, this can be used in the implementation of From ada809f09750d9920e0eed77a46b87aad7a869f9 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 04:24:38 -0500 Subject: [PATCH 15/41] vulkan/graphics_state: Support VkRenderingAttachmentRemapInfoMESA This plumbs the new remap information through as both pipeline and dynamic state. There is no Vulkan entrypoint to set the dynamic state but we provide a helper which the driver can call. We need support setting it as dynamic state and not just trust in pipelines because this state also affects attachment clears, which may happen before any pipelines are bound which would update the dynamic state. --- src/vulkan/runtime/vk_graphics_state.c | 41 ++++++++++++++++++++++++++ src/vulkan/runtime/vk_graphics_state.h | 23 +++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/vulkan/runtime/vk_graphics_state.c b/src/vulkan/runtime/vk_graphics_state.c index 2c6415ee796..8f60bbc41e8 100644 --- a/src/vulkan/runtime/vk_graphics_state.c +++ b/src/vulkan/runtime/vk_graphics_state.c @@ -1303,6 +1303,17 @@ vk_render_pass_state_init(struct vk_render_pass_state *rp, rp->depth_stencil_attachment_samples = asc_info->depthStencilAttachmentSamples; } + const VkRenderingAttachmentRemapInfoMESA *rar_info = + !driver_rp ? vk_get_pipeline_rendering_ar_info(info) : NULL; + for (uint32_t i = 0; i < MESA_VK_MAX_COLOR_ATTACHMENTS; i++) { + rp->color_attachment_remap[i] = + rar_info == NULL ? i : + rar_info->colorAttachmentRemap[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : rar_info->colorAttachmentRemap[i]; + } + rp->depth_stencil_attachment_enable = + rar_info == NULL || rar_info->depthStencilAttachmentEnable; + for (uint32_t i = 0; i < r_info->colorAttachmentCount; i++) { if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); @@ -1320,6 +1331,14 @@ vk_dynamic_graphics_state_init_rp(struct vk_dynamic_graphics_state *dst, const struct vk_render_pass_state *rp) { dst->rp.attachments = rp->attachments; + + STATIC_ASSERT(sizeof(dst->rp.color_attachment_remap) == + sizeof(rp->color_attachment_remap)); + memcpy(dst->rp.color_attachment_remap, rp->color_attachment_remap, + sizeof(rp->color_attachment_remap)); + + dst->rp.depth_stencil_attachment_enable = + rp->depth_stencil_attachment_enable; } #define FOREACH_STATE_GROUP(f) \ @@ -2264,6 +2283,11 @@ vk_dynamic_graphics_state_copy(struct vk_dynamic_graphics_state *dst, COPY_ARRAY(CB_BLEND_CONSTANTS, cb.blend_constants, 4); COPY_IF_SET(RP_ATTACHMENTS, rp.attachments); + if (IS_SET_IN_SRC(RP_REMAP)) { + COPY_ARRAY(RP_REMAP, rp.color_attachment_remap, + MESA_VK_MAX_COLOR_ATTACHMENTS); + COPY_MEMBER(RP_REMAP, rp.attachments); + } if (IS_SET_IN_SRC(INPUT_ATTACHMENT_MAP)) { COPY_MEMBER(INPUT_ATTACHMENT_MAP, ial.color_attachment_count); @@ -3191,6 +3215,23 @@ vk_common_CmdSetRenderingAttachmentLocationsKHR( vk_cmd_set_rendering_attachment_locations(cmd, pLocationInfo); } +void +vk_cmd_set_rendering_attachment_remap(struct vk_command_buffer *cmd, + const VkRenderingAttachmentRemapInfoMESA *info) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->dynamic_graphics_state; + + for (uint32_t i = 0; i < MESA_VK_MAX_COLOR_ATTACHMENTS; i++) { + const uint8_t remap = + info == NULL ? i : + info->colorAttachmentRemap[i] == VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : info->colorAttachmentRemap[i]; + SET_DYN_VALUE(dyn, RP_REMAP, rp.color_attachment_remap[i], remap); + } + SET_DYN_VALUE(dyn, RP_REMAP, rp.depth_stencil_attachment_enable, + info == NULL || info->depthStencilAttachmentEnable); +} + VKAPI_ATTR void VKAPI_CALL vk_common_CmdSetRenderingInputAttachmentIndicesKHR( VkCommandBuffer commandBuffer, diff --git a/src/vulkan/runtime/vk_graphics_state.h b/src/vulkan/runtime/vk_graphics_state.h index 1d5db70927c..94ec7ec6e14 100644 --- a/src/vulkan/runtime/vk_graphics_state.h +++ b/src/vulkan/runtime/vk_graphics_state.h @@ -27,6 +27,7 @@ #include "vulkan/vulkan_core.h" #include "vk_limits.h" +#include "vk_internal_exts.h" #include "util/bitset.h" #include "util/enum_operators.h" @@ -105,6 +106,7 @@ enum mesa_vk_dynamic_graphics_state { MESA_VK_DYNAMIC_CB_WRITE_MASKS, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS, MESA_VK_DYNAMIC_RP_ATTACHMENTS, + MESA_VK_DYNAMIC_RP_REMAP, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP, MESA_VK_DYNAMIC_INPUT_ATTACHMENT_MAP, @@ -768,6 +770,10 @@ struct vk_color_attachment_location_state { /***/ struct vk_render_pass_state { /** Set of image aspects bound as color/depth/stencil attachments + * + * If VkRenderingDepthStencilOutputEnableInfoMESA is included in the pNext + * chain of VkPipelineRenderingCreateInfo, the depth and stencil bits will + * only be included in attachments if they are enabled. * * Set to MESA_VK_RP_ATTACHMENT_INFO_INVALID to indicate that attachment * info is invalid. @@ -797,6 +803,12 @@ struct vk_render_pass_state { /** VkCustomResolveCreateInfoEXT::customResolve */ bool custom_resolve; + + /** VkRenderingAttachmentRemapInfoMESA::colorAttachmentRemap */ + uint8_t color_attachment_remap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** VkRenderingAttachmentRemapInfoMESA::depthStencilAttachmentEnable */ + bool depth_stencil_attachment_enable; }; static inline bool @@ -943,7 +955,14 @@ struct vk_dynamic_graphics_state { struct vk_color_blend_state cb; struct { + /** MESA_VK_DYNAMIC_RP_ATTACHMENTS */ enum vk_rp_attachment_flags attachments; + + /** MESA_VK_DYNAMIC_RP_REMAP */ + uint8_t color_attachment_remap[MESA_VK_MAX_COLOR_ATTACHMENTS]; + + /** MESA_VK_DYNAMIC_RP_REMAP */ + bool depth_stencil_attachment_enable; } rp; /** MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE */ @@ -1299,6 +1318,10 @@ void vk_cmd_set_rendering_attachment_locations(struct vk_command_buffer *cmd, const VkRenderingAttachmentLocationInfoKHR *info); +void +vk_cmd_set_rendering_attachment_remap(struct vk_command_buffer *cmd, + const VkRenderingAttachmentRemapInfoMESA *info); + const char * vk_dynamic_graphic_state_to_str(enum mesa_vk_dynamic_graphics_state state); From 46b88ac7b62a5c01726a4370a4c5128d79cf4e19 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 19:15:04 -0500 Subject: [PATCH 16/41] vulkan/render_pass: Drop self-dependency barriers --- src/vulkan/runtime/vk_render_pass.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 0d63e6ac55e..063245ac5ff 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -2805,6 +2805,13 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, if (dep->dst_subpass != subpass_idx) continue; + /* Self-dependencies just exist to inform the driver that the app might + * do a vkCmdPipelineBarrier() mid-subpass. There's nothing for us to + * emit between subpasses. + */ + if (dep->src_subpass == subpass_idx) + continue; + if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { /* From the Vulkan 1.3.204 spec: * From a2a5831aa5414b9d8f6233fb79ef9e00590af1d5 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 19:15:10 -0500 Subject: [PATCH 17/41] vulkan/render_pass: Break subpass barriers into three helpers --- src/vulkan/runtime/vk_render_pass.c | 681 ++++++++++++++++++---------- 1 file changed, 431 insertions(+), 250 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 063245ac5ff..9670a2ce9d8 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -32,6 +32,7 @@ #include "vk_framebuffer.h" #include "vk_image.h" #include "vk_physical_device.h" +#include "vk_synchronization.h" #include "vk_util.h" #include "vk_android.h" @@ -935,6 +936,67 @@ vk_render_add_subpass(struct vk_render *render, } } +static bool +vk_subpass_dependency_allowed_mid_subpass(const struct vk_subpass_dependency *dep, + uint32_t view_mask) +{ + /* From the Vulkan 1.4.335 spec: + * + * "If srcSubpass is equal to dstSubpass then the VkSubpassDependency + * does not directly define a dependency. Instead, it enables pipeline + * barriers to be used in a render pass instance within the identified + * subpass, where the scopes of one pipeline barrier must be a subset of + * those described by one subpass dependency. Subpass dependencies + * specified in this way that include framebuffer-space stages in the + * srcStageMask must only include framebuffer-space stages in + * dstStageMask, and must include VK_DEPENDENCY_BY_REGION_BIT." + */ + + const VkPipelineStageFlags2 src_stage_mask = + vk_expand_src_stage_flags2(dep->src_stage_mask); + + const VkPipelineStageFlags2 framebuffer_stage_mask = + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + if (!(src_stage_mask & framebuffer_stage_mask)) + return true; + + const VkPipelineStageFlags2 dst_stage_mask = + vk_expand_dst_stage_flags2(dep->dst_stage_mask); + + if (dst_stage_mask & ~framebuffer_stage_mask) + return false; + + if (!(dep->flags & VK_DEPENDENCY_BY_REGION_BIT)) + return false; + + /* From the Vulkan 1.4.335 spec: + * + * "When a subpass dependency is specified in this way for a subpass + * that has more than one view in its view mask, its dependencyFlags + * must include VK_DEPENDENCY_VIEW_LOCAL_BIT." + */ + if (util_bitcount(view_mask) > 1) { + if (!(dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT)) + return false; + + /* This is a bit conservative because Vulkan technically allows you to + * thread subpasses together that have different view masks but where + * views in one subpass map to views in another. We really don't want + * to deal with that shenanigans so we just disallow subpass merging + * whenever viewOffset != 0. This is safe to do (and even assert!) + * because this is required when srcSubpass == dstSubpass. + */ + if (dep->view_offset != 0) + return false; + } + + return true; +} + static uint32_t vk_subpass_get_color_attachment_index(const struct vk_subpass *subpass, uint32_t attachment) @@ -2395,6 +2457,373 @@ transition_attachment(struct vk_command_buffer *cmd_buffer, return view_mask; } +static void +barrier_before_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t last_subpass) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->dst_subpass < first_subpass || last_subpass < dep->dst_subpass) + continue; + + const struct vk_subpass *dst_subpass = + &pass->subpasses[dep->dst_subpass]; + + assert(dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass <= dep->dst_subpass); + + if (dep->src_subpass != VK_SUBPASS_EXTERNAL && + first_subpass <= dep->src_subpass) { + /* These get handled by barrier_mid_subpass_range() */ + assert(dep->view_offset == 0); + continue; + } + + if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { + /* From the Vulkan 1.3.204 spec: + * + * VUID-VkSubpassDependency2-dependencyFlags-03091 + * + * "If dependencyFlags includes VK_DEPENDENCY_VIEW_LOCAL_BIT, + * dstSubpass must not be equal to VK_SUBPASS_EXTERNAL" + */ + assert(dep->src_subpass != VK_SUBPASS_EXTERNAL); + + assert(dep->src_subpass < pass->subpass_count); + const struct vk_subpass *src_subpass = + &pass->subpasses[dep->src_subpass]; + + /* Figure out the set of views in the source subpass affected by this + * dependency. + */ + uint32_t src_dep_view_mask = dst_subpass->view_mask; + if (dep->view_offset >= 0) + src_dep_view_mask <<= dep->view_offset; + else + src_dep_view_mask >>= -dep->view_offset; + + /* From the Vulkan 1.3.204 spec: + * + * "If the dependency is view-local, then each view (dstView) in + * the destination subpass depends on the view dstView + + * pViewOffsets[dependency] in the source subpass. If there is not + * such a view in the source subpass, then this dependency does + * not affect that view in the destination subpass." + */ + if (!(src_subpass->view_mask & src_dep_view_mask)) + continue; + } + + assert(mem_barrier_count < max_mem_barrier_count); + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + uint32_t max_image_barrier_count = 0; + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + assert(sp_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + + max_image_barrier_count += util_bitcount(subpass->view_mask) * + util_bitcount(rp_att->aspects); + } + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) + max_image_barrier_count += util_bitcount(subpass->view_mask); + } + STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); + uint32_t image_barrier_count = 0; + + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + + bool has_layout_transition = false; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + /* If we're using an initial layout, the attachment will already be + * marked as transitioned and this will be a no-op. + */ + uint32_t transitioned_views = + transition_attachment(cmd_buffer, sp_att->attachment, + subpass->view_mask, + sp_att->layout, sp_att->stencil_layout, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + + has_layout_transition |= (sp_att->first_subpass & transitioned_views) != 0; + } + + if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { + transition_attachment(cmd_buffer, + pass->fragment_density_map.attachment, + subpass->view_mask, + pass->fragment_density_map.layout, + VK_IMAGE_LAYOUT_UNDEFINED, + &image_barrier_count, + max_image_barrier_count, + image_barriers); + } + + if (has_layout_transition && !subpass->has_external_src_dependency) { + /* From the Vulkan 1.3.232 spec: + * + * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to + * the first subpass that uses an attachment, then an implicit + * subpass dependency exists from VK_SUBPASS_EXTERNAL to the first + * subpass it is used in. The implicit subpass dependency only + * exists if there exists an automatic layout transition away from + * initialLayout. The subpass dependency operates as if defined + * with the following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = VK_SUBPASS_EXTERNAL; + * .dstSubpass = firstSubpass; // First subpass attachment is used in + * .srcStageMask = VK_PIPELINE_STAGE_NONE; + * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .srcAccessMask = 0; + * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dependencyFlags = 0; + * };" + */ + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + }; + } + } + assert(image_barrier_count <= max_image_barrier_count); + + if (mem_barrier_count > 0 || image_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, + .imageMemoryBarrierCount = image_barrier_count, + .pImageMemoryBarriers = image_barrier_count > 0 ? image_barriers : NULL, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(image_barriers); + STACK_ARRAY_FINISH(mem_barriers); +} + +static void +barrier_mid_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t subpass_idx, + uint32_t last_subpass, uint32_t view_mask) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= subpass_idx); + assert(subpass_idx <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->dst_subpass != subpass_idx) + continue; + + assert(dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass <= dep->dst_subpass); + + /* This case is handled by barrier_before_subpass_range() */ + if (dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->src_subpass < first_subpass) + continue; + + assert(vk_subpass_dependency_allowed_mid_subpass(dep, view_mask)); + + /* These dependendies aren't real memory barriers. They just exist to + * say that the client is allowed to insert a barrer. + */ + if (dep->src_subpass == subpass_idx) + continue; + + assert(mem_barrier_count < max_mem_barrier_count); + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + if (mem_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(mem_barriers); +} + +static void +barrier_after_subpass_range(struct vk_command_buffer *cmd_buffer, + uint32_t first_subpass, uint32_t last_subpass) +{ + const struct vk_render_pass *pass = cmd_buffer->render_pass; + struct vk_device_dispatch_table *disp = + &cmd_buffer->base.device->dispatch_table; + + assert(first_subpass <= last_subpass); + + /* At most all dependencies will need a barrier, and we might have an + * implicit one per subpass. + */ + const uint32_t max_mem_barrier_count = + pass->dependency_count + last_subpass - first_subpass + 1; + STACK_ARRAY(VkMemoryBarrier2, mem_barriers, max_mem_barrier_count); + uint32_t mem_barrier_count = 0; + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->src_subpass < first_subpass || last_subpass < dep->src_subpass) + continue; + + if (dep->dst_subpass != VK_SUBPASS_EXTERNAL) + continue; + + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = dep->src_stage_mask, + .srcAccessMask = dep->src_access_mask, + .dstStageMask = dep->dst_stage_mask, + .dstAccessMask = dep->dst_access_mask, + }; + } + + for (uint32_t s = first_subpass; s <= last_subpass; s++) { + const struct vk_subpass *subpass = &pass->subpasses[s]; + if (subpass->has_external_dst_dependency) + continue; + + bool has_layout_transition = false; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; + + const struct vk_render_pass_attachment *rp_att = + &pass->attachments[sp_att->attachment]; + uint32_t view_mask = transition_view_mask(cmd_buffer, sp_att->attachment, + subpass->view_mask, + rp_att->final_layout, + rp_att->final_stencil_layout); + + has_layout_transition |= (sp_att->last_subpass & view_mask) != 0; + } + + /* From the Vulkan 1.3.232 spec: + * + * "Similarly, if there is no subpass dependency from the last + * subpass that uses an attachment to VK_SUBPASS_EXTERNAL, then an + * implicit subpass dependency exists from the last subpass it is + * used in to VK_SUBPASS_EXTERNAL. The implicit subpass dependency + * only exists if there exists an automatic layout transition into + * finalLayout. The subpass dependency operates as if defined with + * the following parameters: + * + * VkSubpassDependency implicitDependency = { + * .srcSubpass = lastSubpass; // Last subpass attachment is used in + * .dstSubpass = VK_SUBPASS_EXTERNAL; + * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + * .dstStageMask = VK_PIPELINE_STAGE_NONE; + * .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + * .dstAccessMask = 0; + * .dependencyFlags = 0; + * };" + */ + if (has_layout_transition) { + mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstAccessMask = VK_ACCESS_2_NONE, + }; + } + } + + if (mem_barrier_count > 0) { + const VkDependencyInfo dependency_info = { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = 0, + .memoryBarrierCount = mem_barrier_count, + .pMemoryBarriers = mem_barriers, + }; + cmd_buffer->runtime_rp_barrier = true; + disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), + &dependency_info); + cmd_buffer->runtime_rp_barrier = false; + } + + STACK_ARRAY_FINISH(mem_barriers); +} + static void load_attachment(struct vk_command_buffer *cmd_buffer, uint32_t att_idx, uint32_t view_mask, @@ -2795,170 +3224,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * VkMemoryBarriers for subpass dependencies and it may include some * number of VkImageMemoryBarriers for layout transitions. */ - - /* At most all dependencies will need a barrier, and we might have an - * implicit one. */ - STACK_ARRAY(VkMemoryBarrier2, mem_barriers, pass->dependency_count + 1); - uint32_t mem_barrier_count = 0; - for (uint32_t d = 0; d < pass->dependency_count; d++) { - const struct vk_subpass_dependency *dep = &pass->dependencies[d]; - if (dep->dst_subpass != subpass_idx) - continue; - - /* Self-dependencies just exist to inform the driver that the app might - * do a vkCmdPipelineBarrier() mid-subpass. There's nothing for us to - * emit between subpasses. - */ - if (dep->src_subpass == subpass_idx) - continue; - - if (dep->flags & VK_DEPENDENCY_VIEW_LOCAL_BIT) { - /* From the Vulkan 1.3.204 spec: - * - * VUID-VkSubpassDependency2-dependencyFlags-03091 - * - * "If dependencyFlags includes VK_DEPENDENCY_VIEW_LOCAL_BIT, - * dstSubpass must not be equal to VK_SUBPASS_EXTERNAL" - */ - assert(dep->src_subpass != VK_SUBPASS_EXTERNAL); - - assert(dep->src_subpass < pass->subpass_count); - const struct vk_subpass *src_subpass = - &pass->subpasses[dep->src_subpass]; - - /* Figure out the set of views in the source subpass affected by this - * dependency. - */ - uint32_t src_dep_view_mask = subpass->view_mask; - if (dep->view_offset >= 0) - src_dep_view_mask <<= dep->view_offset; - else - src_dep_view_mask >>= -dep->view_offset; - - /* From the Vulkan 1.3.204 spec: - * - * "If the dependency is view-local, then each view (dstView) in - * the destination subpass depends on the view dstView + - * pViewOffsets[dependency] in the source subpass. If there is not - * such a view in the source subpass, then this dependency does - * not affect that view in the destination subpass." - */ - if (!(src_subpass->view_mask & src_dep_view_mask)) - continue; - } - - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = dep->src_stage_mask, - .srcAccessMask = dep->src_access_mask, - .dstStageMask = dep->dst_stage_mask, - .dstAccessMask = dep->dst_access_mask, - }; - } - - uint32_t max_image_barrier_count = 0; - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - assert(sp_att->attachment < pass->attachment_count); - const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; - - max_image_barrier_count += util_bitcount(subpass->view_mask) * - util_bitcount(rp_att->aspects); - } - if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) - max_image_barrier_count += util_bitcount(subpass->view_mask); - STACK_ARRAY(VkImageMemoryBarrier2, image_barriers, max_image_barrier_count); - uint32_t image_barrier_count = 0; - bool has_layout_transition = false; - - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - /* If we're using an initial layout, the attachment will already be - * marked as transitioned and this will be a no-op. - */ - uint32_t transitioned_views = - transition_attachment(cmd_buffer, sp_att->attachment, - subpass->view_mask, - sp_att->layout, sp_att->stencil_layout, - &image_barrier_count, - max_image_barrier_count, - image_barriers); - - has_layout_transition |= (sp_att->first_subpass & transitioned_views) != 0; - } - if (pass->fragment_density_map.attachment != VK_ATTACHMENT_UNUSED) { - transition_attachment(cmd_buffer, pass->fragment_density_map.attachment, - subpass->view_mask, - pass->fragment_density_map.layout, - VK_IMAGE_LAYOUT_UNDEFINED, - &image_barrier_count, - max_image_barrier_count, - image_barriers); - } - assert(image_barrier_count <= max_image_barrier_count); - - if (has_layout_transition && !subpass->has_external_src_dependency) { - /* From the Vulkan 1.3.232 spec: - * - * "If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the - * first subpass that uses an attachment, then an implicit subpass - * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it - * is used in. The implicit subpass dependency only exists if there - * exists an automatic layout transition away from initialLayout. The - * subpass dependency operates as if defined with the following - * parameters: - * - * VkSubpassDependency implicitDependency = { - * .srcSubpass = VK_SUBPASS_EXTERNAL; - * .dstSubpass = firstSubpass; // First subpass attachment is used in - * .srcStageMask = VK_PIPELINE_STAGE_NONE; - * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - * .srcAccessMask = 0; - * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | - * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - * .dependencyFlags = 0; - * };" - */ - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_NONE, - .srcAccessMask = VK_ACCESS_2_NONE, - .dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .dstAccessMask = VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | - VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - }; - } - - if (mem_barrier_count > 0 || image_barrier_count > 0) { - const VkDependencyInfo dependency_info = { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = 0, - .memoryBarrierCount = mem_barrier_count, - .pMemoryBarriers = mem_barrier_count > 0 ? mem_barriers : NULL, - .imageMemoryBarrierCount = image_barrier_count, - .pImageMemoryBarriers = image_barrier_count > 0 ? image_barriers : NULL, - }; - cmd_buffer->runtime_rp_barrier = true; - disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), - &dependency_info); - cmd_buffer->runtime_rp_barrier = false; - } - - STACK_ARRAY_FINISH(image_barriers); - STACK_ARRAY_FINISH(mem_barriers); + barrier_before_subpass_range(cmd_buffer, subpass_idx, subpass_idx); /* Next, handle any VK_ATTACHMENT_LOAD_OP_CLEAR that we couldn't handle * directly by emitting a quick vkCmdBegin/EndRendering to do the load. @@ -3112,97 +3378,12 @@ end_subpass(struct vk_command_buffer *cmd_buffer, const struct vk_render_pass *pass = cmd_buffer->render_pass; const uint32_t subpass_idx = cmd_buffer->subpass_idx; assert(subpass_idx < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; disp->CmdEndRendering(vk_command_buffer_to_handle(cmd_buffer)); - /* At most all dependencies will need a barrier, and we might have an - * implicit one. */ - STACK_ARRAY(VkMemoryBarrier2, mem_barriers, pass->dependency_count + 1); - uint32_t mem_barrier_count = 0; - for (uint32_t d = 0; d < pass->dependency_count; d++) { - const struct vk_subpass_dependency *dep = &pass->dependencies[d]; - if (dep->src_subpass != subpass_idx) - continue; - - if (dep->dst_subpass != VK_SUBPASS_EXTERNAL) - continue; - - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = dep->src_stage_mask, - .srcAccessMask = dep->src_access_mask, - .dstStageMask = dep->dst_stage_mask, - .dstAccessMask = dep->dst_access_mask, - }; - } - - if (!subpass->has_external_dst_dependency) { - bool has_layout_transition = false; - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; - - const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; - uint32_t view_mask = transition_view_mask(cmd_buffer, sp_att->attachment, - subpass->view_mask, - rp_att->final_layout, - rp_att->final_stencil_layout); - - has_layout_transition |= (sp_att->last_subpass & view_mask) != 0; - } - - /* From the Vulkan 1.3.232 spec: - * - * "Similarly, if there is no subpass dependency from the last - * subpass that uses an attachment to VK_SUBPASS_EXTERNAL, then an - * implicit subpass dependency exists from the last subpass it is - * used in to VK_SUBPASS_EXTERNAL. The implicit subpass dependency - * only exists if there exists an automatic layout transition into - * finalLayout. The subpass dependency operates as if defined with - * the following parameters: - * - * VkSubpassDependency implicitDependency = { - * .srcSubpass = lastSubpass; // Last subpass attachment is used in - * .dstSubpass = VK_SUBPASS_EXTERNAL; - * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - * .dstStageMask = VK_PIPELINE_STAGE_NONE; - * .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - * .dstAccessMask = 0; - * .dependencyFlags = 0; - * };" - */ - if (has_layout_transition) { - mem_barriers[mem_barrier_count++] = (VkMemoryBarrier2){ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_NONE, - .dstAccessMask = VK_ACCESS_2_NONE, - }; - } - } - - if (mem_barrier_count > 0) { - const VkDependencyInfo dependency_info = { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = 0, - .memoryBarrierCount = mem_barrier_count, - .pMemoryBarriers = mem_barriers, - }; - cmd_buffer->runtime_rp_barrier = true; - disp->CmdPipelineBarrier2(vk_command_buffer_to_handle(cmd_buffer), - &dependency_info); - cmd_buffer->runtime_rp_barrier = false; - } - - STACK_ARRAY_FINISH(mem_barriers); + barrier_after_subpass_range(cmd_buffer, subpass_idx, subpass_idx); } VKAPI_ATTR void VKAPI_CALL From f2a3584b530f6fb279dacd73129173fcde60370a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 11:04:33 -0500 Subject: [PATCH 18/41] vulkan/render_pass: Drop the explicit load_attachment() for FSR Above this we call load_attachment() for every attachment in the subpass, regardless of usage so FSR is already covered there. --- src/vulkan/runtime/vk_render_pass.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 9670a2ce9d8..6aa8a0be8db 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -3272,12 +3272,6 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, struct vk_attachment_state *att_state = &cmd_buffer->attachments[sp_att->attachment]; - /* Fragment shading rate attachments have no loadOp (it's implicitly - * LOAD_OP_LOAD) so we need to ensure the load op happens. - */ - load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, - sp_att->layout, sp_att->stencil_layout); - fsr_attachment = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, .imageView = vk_image_view_to_handle(att_state->image_view), From 833dd4ff9531cbb7ab28bdf67563aa66d362a8fa Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 10:56:37 -0500 Subject: [PATCH 19/41] vulkan/render_pass: Add an assert for last_subpass --- src/vulkan/runtime/vk_render_pass.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 6aa8a0be8db..1f48e69e415 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -3146,6 +3146,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * some places where it may have wanted STORE_OP_NONE but that should * be harmless. */ + assert(subpass_idx < pass->subpass_count - 1); depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; } From c8891f499bd068c319ebb6d71733c0ad30ef0570 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 16:15:41 -0500 Subject: [PATCH 20/41] vulkan/render_pass: Use a vk_render to populate VkRenderingInfo --- src/vulkan/runtime/vk_render_pass.c | 242 +++++++++++++++------------- 1 file changed, 127 insertions(+), 115 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 1f48e69e415..fd759bb2a0a 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -1765,13 +1765,25 @@ vk_get_command_buffer_inheritance_as_rendering_resume( if (pass == NULL) return NULL; - assert(inheritance->subpass < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[inheritance->subpass]; + const uint32_t subpass_idx = inheritance->subpass; + assert(subpass_idx < pass->subpass_count); VK_FROM_HANDLE(vk_framebuffer, fb, inheritance->framebuffer); if (fb == NULL || (fb->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) return NULL; + struct vk_render render; + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + if (render.color_attachments[c].attachment != VK_ATTACHMENT_UNUSED) + color_count = MAX2(color_count, c + 1); + } + const bool has_ds = + render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED; + data->rendering = (VkRenderingInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, .flags = VK_RENDERING_RESUMING_BIT, @@ -1780,57 +1792,57 @@ vk_get_command_buffer_inheritance_as_rendering_resume( .extent = { fb->width, fb->height }, }, .layerCount = fb->layers, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, + .viewMask = pass->is_multiview ? render.view_mask : 0, }; VkRenderingAttachmentInfo *attachments = data->attachments; - VkRenderingAttachmentFlagsInfoKHR *attachments_flags = (VkRenderingAttachmentFlagsInfoKHR *) - (data->attachments + subpass->color_count + - 2 * (subpass->depth_stencil_attachment != NULL)); + VkRenderingAttachmentFlagsInfoKHR *attachments_flags = + (VkRenderingAttachmentFlagsInfoKHR *) + (data->attachments + color_count + 2 * has_ds); - for (unsigned i = 0; i < subpass->color_count; i++) { - const struct vk_subpass_attachment *sp_att = - &subpass->color_attachments[i]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { - attachments[i] = (VkRenderingAttachmentInfo) { + for (unsigned c = 0; c < color_count; c++) { + const struct vk_subpass_attachment *r_att = &render.color_attachments[c]; + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { + attachments[c] = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = VK_NULL_HANDLE, }; continue; } - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); + const struct vk_render_pass_attachment *rp_att = &pass->attachments[c]; - attachments_flags[i] = (VkRenderingAttachmentFlagsInfoKHR) { + attachments_flags[c] = (VkRenderingAttachmentFlagsInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_FLAGS_INFO_KHR, .flags = vk_attachment_description_flags_to_rendering_flags( - pass->attachments[sp_att->attachment].flags), + rp_att->flags), }; - attachments[i] = (VkRenderingAttachmentInfo) { + attachments[c] = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, - .pNext = &attachments_flags[i], - .imageView = fb->attachments[sp_att->attachment], - .imageLayout = sp_att->layout, + .pNext = &attachments_flags[c], + .imageView = fb->attachments[r_att->attachment], + .imageLayout = r_att->layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; } - data->rendering.colorAttachmentCount = subpass->color_count; + data->rendering.colorAttachmentCount = color_count; data->rendering.pColorAttachments = attachments; - attachments += subpass->color_count; + attachments += color_count; - if (subpass->depth_stencil_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->depth_stencil_attachment; - assert(sp_att->attachment < pass->attachment_count); + if (render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.depth_stencil_attachment; + assert(r_att->attachment < pass->attachment_count); - VK_FROM_HANDLE(vk_image_view, iview, fb->attachments[sp_att->attachment]); + VK_FROM_HANDLE(vk_image_view, iview, fb->attachments[r_att->attachment]); if (iview->image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { *attachments = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = vk_image_view_to_handle(iview), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; @@ -1841,7 +1853,7 @@ vk_get_command_buffer_inheritance_as_rendering_resume( *attachments = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = vk_image_view_to_handle(iview), - .imageLayout = sp_att->stencil_layout, + .imageLayout = r_att->stencil_layout, .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, }; @@ -1849,26 +1861,26 @@ vk_get_command_buffer_inheritance_as_rendering_resume( } } - if (subpass->fragment_shading_rate_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->fragment_shading_rate_attachment; - assert(sp_att->attachment < pass->attachment_count); + if (render.fragment_shading_rate_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.fragment_shading_rate_attachment; + assert(r_att->attachment < pass->attachment_count); data->fsr_att = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, - .imageView = fb->attachments[sp_att->attachment], - .imageLayout = sp_att->layout, + .imageView = fb->attachments[r_att->attachment], + .imageLayout = r_att->layout, .shadingRateAttachmentTexelSize = - subpass->fragment_shading_rate_attachment_texel_size, + render.fragment_shading_rate_attachment_texel_size, }; __vk_append_struct(&data->rendering, &data->fsr_att); } - if (subpass->mrtss_samples != 0) { + if (render.mrtss_samples != 0) { data->mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, .multisampledRenderToSingleSampledEnable = true, - .rasterizationSamples = subpass->mrtss_samples, + .rasterizationSamples = render.mrtss_samples, }; __vk_append_struct(&data->rendering, (void *)&data->mrtss); } @@ -2913,32 +2925,31 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; const uint32_t subpass_idx = cmd_buffer->subpass_idx; assert(subpass_idx < pass->subpass_count); - const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; + struct vk_render render; + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + /* First, we figure out all our attachments and attempt to handle image * layout transitions and load ops as part of vkCmdBeginRendering if we * can. For any we can't handle this way, we'll need explicit barriers * or quick vkCmdBegin/EndRendering to do the load op. */ - STACK_ARRAY(VkRenderingAttachmentFlagsInfoKHR, color_attachments_flags, - subpass->color_count); - STACK_ARRAY(VkRenderingAttachmentInfo, color_attachments, - subpass->color_count); - STACK_ARRAY(VkRenderingAttachmentInitialLayoutInfoMESA, - color_attachment_initial_layouts, - subpass->color_count); + VkRenderingAttachmentInfo color_attachments[MESA_VK_MAX_COLOR_ATTACHMENTS]; + VkRenderingAttachmentFlagsInfoKHR color_attachments_flags[MESA_VK_MAX_COLOR_ATTACHMENTS]; + VkRenderingAttachmentInitialLayoutInfoMESA color_attachment_initial_layouts[MESA_VK_MAX_COLOR_ATTACHMENTS]; - for (uint32_t i = 0; i < subpass->color_count; i++) { - const struct vk_subpass_attachment *sp_att = - &subpass->color_attachments[i]; + uint32_t color_count = 0; + for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { + const struct vk_subpass_attachment *r_att = &render.color_attachments[c]; + VkRenderingAttachmentInfo *color_attachment = &color_attachments[c]; VkRenderingAttachmentFlagsInfoKHR *color_attachment_flags = - &color_attachments_flags[i]; - VkRenderingAttachmentInfo *color_attachment = &color_attachments[i]; + &color_attachments_flags[c]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) { + if (r_att->attachment == VK_ATTACHMENT_UNUSED) { *color_attachment = (VkRenderingAttachmentInfo) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .imageView = VK_NULL_HANDLE, @@ -2946,11 +2957,13 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, continue; } - assert(sp_att->attachment < pass->attachment_count); + color_count = MAX2(color_count, c + 1); + + assert(r_att->attachment < pass->attachment_count); const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + &pass->attachments[r_att->attachment]; struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; + &cmd_buffer->attachments[r_att->attachment]; *color_attachment_flags = (VkRenderingAttachmentFlagsInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_FLAGS_INFO_KHR, @@ -2960,25 +2973,25 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, .pNext = color_attachment_flags, .imageView = vk_image_view_to_handle(att_state->image_view), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, }; - if (!(subpass->view_mask & att_state->views_loaded)) { + if (!(render.view_mask & att_state->views_loaded)) { /* None of these views have been used before */ color_attachment->loadOp = rp_att->load_op; color_attachment->clearValue = att_state->clear_value; - att_state->views_loaded |= subpass->view_mask; + att_state->views_loaded |= render.view_mask; VkImageLayout initial_layout; if (can_use_attachment_initial_layout(cmd_buffer, - sp_att->attachment, - subpass->view_mask, + r_att->attachment, + render.view_mask, &initial_layout, NULL) && - sp_att->layout != initial_layout) { + r_att->layout != initial_layout) { assert(color_attachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); VkRenderingAttachmentInitialLayoutInfoMESA *color_initial_layout = - &color_attachment_initial_layouts[i]; + &color_attachment_initial_layouts[c]; *color_initial_layout = (VkRenderingAttachmentInitialLayoutInfoMESA) { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INITIAL_LAYOUT_INFO_MESA, .initialLayout = initial_layout, @@ -2986,8 +2999,8 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, __vk_append_struct(color_attachment, color_initial_layout); vk_command_buffer_set_attachment_layout(cmd_buffer, - sp_att->attachment, - sp_att->layout, + r_att->attachment, + r_att->layout, VK_IMAGE_LAYOUT_UNDEFINED); } } else { @@ -2997,7 +3010,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, color_attachment->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } - if (!(subpass->view_mask & ~sp_att->last_subpass)) { + if (!(render.view_mask & ~r_att->last_subpass)) { /* This is the last subpass for every view */ color_attachment->storeOp = rp_att->store_op; } else { @@ -3014,25 +3027,25 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, /* With multisample render to single sample, we may have a resolve even * if we don't have a resolve attachment. */ - color_attachment->resolveMode = sp_att->resolve_mode; + color_attachment->resolveMode = r_att->resolve_mode; - if (sp_att->resolve != NULL) { - assert(sp_att->resolve->attachment < pass->attachment_count); + if (r_att->resolve != NULL) { + assert(r_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = - &cmd_buffer->attachments[sp_att->resolve->attachment]; + &cmd_buffer->attachments[r_att->resolve->attachment]; /* Resolve attachments are entirely overwritten by the resolve * operation so the load op really doesn't matter. We can consider * the resolve as being the load. */ - res_att_state->views_loaded |= subpass->view_mask; + res_att_state->views_loaded |= render.view_mask; const struct vk_render_pass_attachment *resolve_att = - &pass->attachments[sp_att->resolve->attachment]; + &pass->attachments[r_att->resolve->attachment]; color_attachment->resolveImageView = vk_image_view_to_handle(res_att_state->image_view); - color_attachment->resolveImageLayout = sp_att->resolve->layout; + color_attachment->resolveImageLayout = r_att->resolve->layout; color_attachment_flags->flags = vk_attachment_description_flags_to_rendering_flags(resolve_att->flags); @@ -3059,21 +3072,21 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, }; const VkSampleLocationsInfoEXT *sample_locations = NULL; - if (subpass->depth_stencil_attachment != NULL) { - const struct vk_subpass_attachment *sp_att = - subpass->depth_stencil_attachment; + if (render.depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.depth_stencil_attachment; - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); const struct vk_render_pass_attachment *rp_att = - &pass->attachments[sp_att->attachment]; + &pass->attachments[r_att->attachment]; struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; + &cmd_buffer->attachments[r_att->attachment]; - assert(sp_att->aspects == rp_att->aspects); + assert(r_att->aspects == rp_att->aspects); if (rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { depth_attachment.imageView = vk_image_view_to_handle(att_state->image_view); - depth_attachment.imageLayout = sp_att->layout; + depth_attachment.imageLayout = r_att->layout; depth_attachment_flags.flags = vk_attachment_description_flags_to_rendering_flags(rp_att->flags); } @@ -3081,7 +3094,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, if (rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { stencil_attachment.imageView = vk_image_view_to_handle(att_state->image_view); - stencil_attachment.imageLayout = sp_att->stencil_layout; + stencil_attachment.imageLayout = r_att->stencil_layout; stencil_attachment_flags.flags = vk_attachment_description_flags_to_rendering_flags(rp_att->flags); } @@ -3091,22 +3104,22 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, __vk_append_struct(&stencil_attachment, &stencil_attachment_flags); - if (!(subpass->view_mask & att_state->views_loaded)) { + if (!(render.view_mask & att_state->views_loaded)) { /* None of these views have been used before */ depth_attachment.loadOp = rp_att->load_op; depth_attachment.clearValue = att_state->clear_value; stencil_attachment.loadOp = rp_att->stencil_load_op; stencil_attachment.clearValue = att_state->clear_value; - att_state->views_loaded |= subpass->view_mask; + att_state->views_loaded |= render.view_mask; VkImageLayout initial_layout, initial_stencil_layout; if (can_use_attachment_initial_layout(cmd_buffer, - sp_att->attachment, - subpass->view_mask, + r_att->attachment, + render.view_mask, &initial_layout, &initial_stencil_layout)) { if ((rp_att->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - sp_att->layout != initial_layout) { + r_att->layout != initial_layout) { assert(depth_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); depth_initial_layout.initialLayout = initial_layout; __vk_append_struct(&depth_attachment, @@ -3114,7 +3127,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } if ((rp_att->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - sp_att->stencil_layout != initial_stencil_layout) { + r_att->stencil_layout != initial_stencil_layout) { assert(stencil_attachment.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR); stencil_initial_layout.initialLayout = initial_stencil_layout; __vk_append_struct(&stencil_attachment, @@ -3122,9 +3135,9 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, } vk_command_buffer_set_attachment_layout(cmd_buffer, - sp_att->attachment, - sp_att->layout, - sp_att->stencil_layout); + r_att->attachment, + r_att->layout, + r_att->stencil_layout); } } else { /* We've seen at least one of the views of this attachment before so @@ -3134,7 +3147,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; } - if (!(subpass->view_mask & ~sp_att->last_subpass)) { + if (!(render.view_mask & ~r_att->last_subpass)) { /* This is the last subpass for every view */ depth_attachment.storeOp = rp_att->store_op; stencil_attachment.storeOp = rp_att->stencil_store_op; @@ -3178,20 +3191,20 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, get_subpass_sample_locations(cmd_buffer->pass_sample_locations, subpass_idx); - u_foreach_bit(view, subpass->view_mask) + u_foreach_bit(view, render.view_mask) att_state->views[view].sample_locations = sample_locations; } /* With multisample render to single sample, we may have a resolve even * if we don't have a resolve attachment. */ - depth_attachment.resolveMode = sp_att->resolve_mode; - stencil_attachment.resolveMode = sp_att->stencil_resolve_mode; + depth_attachment.resolveMode = r_att->resolve_mode; + stencil_attachment.resolveMode = r_att->stencil_resolve_mode; - if (sp_att->resolve != NULL) { - assert(sp_att->resolve->attachment < pass->attachment_count); + if (r_att->resolve != NULL) { + assert(r_att->resolve->attachment < pass->attachment_count); struct vk_attachment_state *res_att_state = - &cmd_buffer->attachments[sp_att->resolve->attachment]; + &cmd_buffer->attachments[r_att->resolve->attachment]; VkImageAspectFlags resolved_aspects = 0; @@ -3199,7 +3212,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, depth_attachment.resolveImageView = vk_image_view_to_handle(res_att_state->image_view); depth_attachment.resolveImageLayout = - sp_att->resolve->layout; + r_att->resolve->layout; resolved_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; } @@ -3207,7 +3220,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, stencil_attachment.resolveImageView = vk_image_view_to_handle(res_att_state->image_view); stencil_attachment.resolveImageLayout = - sp_att->resolve->stencil_layout; + r_att->resolve->stencil_layout; resolved_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; } @@ -3216,7 +3229,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * resolve operation so the load op really doesn't matter. * We can consider the resolve as being the load. */ - res_att_state->views_loaded |= subpass->view_mask; + res_att_state->views_loaded |= render.view_mask; } } } @@ -3230,6 +3243,7 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, /* Next, handle any VK_ATTACHMENT_LOAD_OP_CLEAR that we couldn't handle * directly by emitting a quick vkCmdBegin/EndRendering to do the load. */ + struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; for (uint32_t a = 0; a < subpass->attachment_count; a++) { const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; if (sp_att->attachment == VK_ATTACHMENT_UNUSED) @@ -3252,33 +3266,33 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, .flags = VK_RENDERING_LOCAL_READ_CONCURRENT_ACCESS_CONTROL_BIT_KHR, .renderArea = cmd_buffer->render_area, .layerCount = pass->is_multiview ? 1 : framebuffer->layers, - .viewMask = pass->is_multiview ? subpass->view_mask : 0, - .colorAttachmentCount = subpass->color_count, - .pColorAttachments = subpass->color_count > 0 ? color_attachments : NULL, + .viewMask = pass->is_multiview ? render.view_mask : 0, + .colorAttachmentCount = color_count, + .pColorAttachments = color_count > 0 ? color_attachments : NULL, .pDepthAttachment = depth_attachment.imageView != VK_NULL_HANDLE ? &depth_attachment : NULL, .pStencilAttachment = stencil_attachment.imageView != VK_NULL_HANDLE ? &stencil_attachment : NULL, }; - if (subpass->legacy_dithering_enabled) + if (render.legacy_dithering_enabled) rendering.flags |= VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT; VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_attachment; - if (subpass->fragment_shading_rate_attachment) { - const struct vk_subpass_attachment *sp_att = - subpass->fragment_shading_rate_attachment; + if (render.fragment_shading_rate_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct vk_subpass_attachment *r_att = + &render.fragment_shading_rate_attachment; - assert(sp_att->attachment < pass->attachment_count); + assert(r_att->attachment < pass->attachment_count); struct vk_attachment_state *att_state = - &cmd_buffer->attachments[sp_att->attachment]; + &cmd_buffer->attachments[r_att->attachment]; fsr_attachment = (VkRenderingFragmentShadingRateAttachmentInfoKHR) { .sType = VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, .imageView = vk_image_view_to_handle(att_state->image_view), - .imageLayout = sp_att->layout, + .imageLayout = r_att->layout, .shadingRateAttachmentTexelSize = - subpass->fragment_shading_rate_attachment_texel_size, + render.fragment_shading_rate_attachment_texel_size, }; __vk_append_struct(&rendering, &fsr_attachment); } @@ -3319,11 +3333,11 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * to be changed by appending other structures later. */ VkMultisampledRenderToSingleSampledInfoEXT mrtss; - if (subpass->mrtss_samples != 0) { + if (render.mrtss_samples != 0) { mrtss = (VkMultisampledRenderToSingleSampledInfoEXT) { .sType = VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, .multisampledRenderToSingleSampledEnable = true, - .rasterizationSamples = subpass->mrtss_samples, + .rasterizationSamples = render.mrtss_samples, }; __vk_append_struct(&rendering, (void *)&mrtss); } @@ -3356,14 +3370,12 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * So I'm not sure this CmdSetRenderingInputAttachmentIndices() is * really needed, but let's keep it to play by the rules. */ - VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; + VkRenderingInputAttachmentIndexInfo ial_tmp = + pass->subpasses[subpass_idx].ial.info; ial_tmp.pNext = NULL; disp->CmdSetRenderingInputAttachmentIndices(vk_command_buffer_to_handle(cmd_buffer), &ial_tmp); } - - STACK_ARRAY_FINISH(color_attachments); - STACK_ARRAY_FINISH(color_attachment_initial_layouts); } static void From fbc43b7636cbfac7662a27f4ee24e5d0ac46e8e2 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 8 Dec 2025 11:11:30 -0500 Subject: [PATCH 21/41] vulkan/render_pass: Add a vk_subpass_merge enum This denotes merged subpasses by marking the beginning and end of sections. The new enums is intentionally a bitfield so it's easy to check if a subpass begins or ends a group. --- src/vulkan/runtime/vk_render_pass.c | 25 +++++++++++++++++++++++++ src/vulkan/runtime/vk_render_pass.h | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index fd759bb2a0a..7dfa4eb41dd 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -467,6 +467,30 @@ resolve_mode_for_format(VkFormat format) return VK_RESOLVE_MODE_AVERAGE_BIT; } +static uint32_t +vk_render_pass_find_merge_begin(const struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + while (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_BEGIN)) { + /* The first subpass is always a begin */ + assert(subpass_idx > 0); + subpass_idx--; + } + return subpass_idx; +} + +static uint32_t +vk_render_pass_find_merge_end(const struct vk_render_pass *pass, + uint32_t subpass_idx) +{ + while (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_END)) { + /* The last subpass is always an end */ + assert(subpass_idx < pass->subpass_count - 1); + subpass_idx++; + } + return subpass_idx; +} + struct vk_render { uint32_t view_mask; VkSampleCountFlagBits mrtss_samples; @@ -1208,6 +1232,7 @@ vk_common_CreateRenderPass2(VkDevice _device, const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[s]; struct vk_subpass *subpass = &pass->subpasses[s]; + subpass->merge = MESA_VK_SUBPASS_MERGE_SINGLE; subpass->attachment_count = num_subpass_attachments2(desc); subpass->attachments = next_subpass_attachment; diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 1a912b3dd85..45eda648efe 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -80,8 +80,26 @@ struct vk_subpass_attachment { VkResolveModeFlagBits stencil_resolve_mode; }; +/***/ +enum vk_subpass_merge { + /* This subpass is in the middle of a merged subpass group */ + MESA_VK_SUBPASS_MERGE_MID = 0, + + /* This subpass begins a merged subpass group */ + MESA_VK_SUBPASS_MERGE_BEGIN = 1, + + /* This subpass ends a merged subpass group */ + MESA_VK_SUBPASS_MERGE_END = 2, + + /** This subpass is in its own subpass group */ + MESA_VK_SUBPASS_MERGE_SINGLE = MESA_VK_SUBPASS_MERGE_BEGIN | + MESA_VK_SUBPASS_MERGE_END, +}; + /***/ struct vk_subpass { + enum vk_subpass_merge merge; + /** Count of all attachments referenced by this subpass */ uint32_t attachment_count; From a3cc30b3243f48d045f7a23d586d5c0a1c016516 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 09:40:58 -0500 Subject: [PATCH 22/41] vulkan/render_pass: Handle merged subpasses in begin/end_subpass() --- src/vulkan/runtime/vk_render_pass.c | 136 +++++++++++++++++++--------- src/vulkan/runtime/vk_render_pass.h | 1 + 2 files changed, 94 insertions(+), 43 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 7dfa4eb41dd..576a54e8a40 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -1797,9 +1797,24 @@ vk_get_command_buffer_inheritance_as_rendering_resume( if (fb == NULL || (fb->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT)) return NULL; + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); + struct vk_render render; - vk_render_init_for_single_subpass(&render, pass, - &pass->subpasses[subpass_idx]); + if (pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE) { + /* For singular subpasses, we use init_for_single_subpass, which doesn't + * re-arrange anything so we avoid requiring DRLR and + * CmdSetRenderingAttachmentLocationInfo(). + */ + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + } else { + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s <= merge_end; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + } uint32_t color_count = 0; for (uint32_t c = 0; c < MESA_VK_MAX_COLOR_ATTACHMENTS; c++) { @@ -1910,6 +1925,13 @@ vk_get_command_buffer_inheritance_as_rendering_resume( __vk_append_struct(&data->rendering, (void *)&data->mrtss); } + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + if (subpass->merge != MESA_VK_SUBPASS_MERGE_SINGLE) { + data->rar = subpass->rar_info; + data->rar.pNext = NULL; + __vk_append_struct(&data->rendering, (void *)&data->rar); + } + return &data->rendering; } @@ -2946,6 +2968,7 @@ static void begin_subpass(struct vk_command_buffer *cmd_buffer, const VkSubpassBeginInfo *begin_info) { + VkCommandBuffer cmd_buffer_h = vk_command_buffer_to_handle(cmd_buffer); const struct vk_render_pass *pass = cmd_buffer->render_pass; const struct vk_framebuffer *framebuffer = cmd_buffer->framebuffer; const uint32_t subpass_idx = cmd_buffer->subpass_idx; @@ -2953,9 +2976,44 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); + + if (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_BEGIN)) { + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + + /* If this isn't a begin, just do mid-subpass barriers */ + barrier_mid_subpass_range(cmd_buffer, merge_begin, subpass_idx, + merge_end, subpass->view_mask); + + /* And update input indices and color locations */ + VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; + ial_tmp.pNext = NULL; + disp->CmdSetRenderingInputAttachmentIndices(cmd_buffer_h, &ial_tmp); + + VkRenderingAttachmentRemapInfoMESA rar_tmp = subpass->rar_info; + rar_tmp.pNext = NULL; + vk_cmd_set_rendering_attachment_remap(cmd_buffer, &rar_tmp); + + /* And that's it! */ + return; + } + struct vk_render render; - vk_render_init_for_single_subpass(&render, pass, - &pass->subpasses[subpass_idx]); + if (pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE) { + /* For singular subpasses, we use init_for_single_subpass, which doesn't + * re-arrange anything so we avoid requiring DRLR and + * CmdSetRenderingAttachmentLocationInfo(). + */ + vk_render_init_for_single_subpass(&render, pass, + &pass->subpasses[subpass_idx]); + } else { + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s <= merge_end; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + } /* First, we figure out all our attachments and attempt to handle image * layout transitions and load ops as part of vkCmdBeginRendering if we @@ -3263,19 +3321,21 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, * VkMemoryBarriers for subpass dependencies and it may include some * number of VkImageMemoryBarriers for layout transitions. */ - barrier_before_subpass_range(cmd_buffer, subpass_idx, subpass_idx); + barrier_before_subpass_range(cmd_buffer, merge_begin, merge_end); /* Next, handle any VK_ATTACHMENT_LOAD_OP_CLEAR that we couldn't handle * directly by emitting a quick vkCmdBegin/EndRendering to do the load. */ - struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; - for (uint32_t a = 0; a < subpass->attachment_count; a++) { - const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; - if (sp_att->attachment == VK_ATTACHMENT_UNUSED) - continue; + for (uint32_t s = merge_begin; s <= merge_end; s++) { + struct vk_subpass *subpass = &pass->subpasses[s]; + for (uint32_t a = 0; a < subpass->attachment_count; a++) { + const struct vk_subpass_attachment *sp_att = &subpass->attachments[a]; + if (sp_att->attachment == VK_ATTACHMENT_UNUSED) + continue; - load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, - sp_att->layout, sp_att->stencil_layout); + load_attachment(cmd_buffer, sp_att->attachment, subpass->view_mask, + sp_att->layout, sp_att->stencil_layout); + } } /* TODO: Handle preserve attachments @@ -3367,39 +3427,21 @@ begin_subpass(struct vk_command_buffer *cmd_buffer, __vk_append_struct(&rendering, (void *)&mrtss); } - disp->CmdBeginRendering(vk_command_buffer_to_handle(cmd_buffer), - &rendering); + const struct vk_subpass *subpass = &pass->subpasses[subpass_idx]; + + VkRenderingAttachmentRemapInfoMESA rar_tmp; + if (subpass->merge != MESA_VK_SUBPASS_MERGE_SINGLE) { + rar_tmp = subpass->rar_info; + rar_tmp.pNext = NULL; + __vk_append_struct(&rendering, (void *)&rar_tmp); + } + + disp->CmdBeginRendering(cmd_buffer_h, &rendering); if (disp->CmdSetRenderingInputAttachmentIndices) { - /* From the Vulkan 1.4.312 spec: - * " - * Until this command is called, mappings in the command buffer state - * are treated as each color attachment specified in vkCmdBeginRendering - * mapping to subpass inputs with a InputAttachmentIndex equal to its - * index in VkRenderingInfo::pColorAttachments, and depth/stencil - * attachments mapping to input attachments without these decorations. - * This state is reset whenever vkCmdBeginRendering is called. - * " - * - * In practice, CmdBindPipeline() should apply exactly the same - * state to the vk_command_buffer dynamic state, and that's exactly - * what the Vulkan spec wants: - * - * " - * This command sets the input attachment index mappings for subsequent - * drawing commands, and must match the mappings provided to the bound - * pipeline, if one is bound, which can be set by chaining - * VkRenderingInputAttachmentIndexInfo to VkGraphicsPipelineCreateInfo. - * " - * - * So I'm not sure this CmdSetRenderingInputAttachmentIndices() is - * really needed, but let's keep it to play by the rules. - */ - VkRenderingInputAttachmentIndexInfo ial_tmp = - pass->subpasses[subpass_idx].ial.info; + VkRenderingInputAttachmentIndexInfo ial_tmp = subpass->ial.info; ial_tmp.pNext = NULL; - disp->CmdSetRenderingInputAttachmentIndices(vk_command_buffer_to_handle(cmd_buffer), - &ial_tmp); + disp->CmdSetRenderingInputAttachmentIndices(cmd_buffer_h, &ial_tmp); } } @@ -3413,9 +3455,17 @@ end_subpass(struct vk_command_buffer *cmd_buffer, struct vk_device_dispatch_table *disp = &cmd_buffer->base.device->dispatch_table; + if (!(pass->subpasses[subpass_idx].merge & MESA_VK_SUBPASS_MERGE_END)) + return; + disp->CmdEndRendering(vk_command_buffer_to_handle(cmd_buffer)); - barrier_after_subpass_range(cmd_buffer, subpass_idx, subpass_idx); + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx); + const uint32_t merge_end = + vk_render_pass_find_merge_end(pass, subpass_idx); + + barrier_after_subpass_range(cmd_buffer, merge_begin, merge_end); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index 45eda648efe..c42970a6a67 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -431,6 +431,7 @@ struct vk_gcbiarr_data { VkRenderingInfo rendering; VkRenderingFragmentShadingRateAttachmentInfoKHR fsr_att; VkMultisampledRenderToSingleSampledInfoEXT mrtss; + VkRenderingAttachmentRemapInfoMESA rar; VkRenderingAttachmentInfo attachments[]; }; From 31566ce12658be4bbbd069e261f8b31bd9df3c18 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Dec 2025 23:52:09 -0500 Subject: [PATCH 23/41] vulkan/render_pass: Add subpass merging support The idea here is that we leave it up to the driver what it wants to merge but the render pass code does all the hard work to figure out what can actually be merged based on view masks, render targets, dependencies, and the like. The interface is simple: try_merge_subpasses(), which takes a subpass index and attempts to merge it into the previous one. If the merge succeeds, those two subpasses are now merged and the function returns true. --- src/vulkan/runtime/vk_render_pass.c | 88 ++++++++++++++++++++++++++--- src/vulkan/runtime/vk_render_pass.h | 13 +++++ 2 files changed, 92 insertions(+), 9 deletions(-) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 576a54e8a40..259e39b9387 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -1184,14 +1184,69 @@ vk_subpass_init_pipeline_infos(struct vk_render_pass *pass, }; } -VKAPI_ATTR VkResult VKAPI_CALL -vk_common_CreateRenderPass2(VkDevice _device, - const VkRenderPassCreateInfo2 *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkRenderPass *pRenderPass) +bool +vk_render_pass_try_merge_subpass(struct vk_render_pass *pass, + uint32_t subpass_idx) { - VK_FROM_HANDLE(vk_device, device, _device); + assert(subpass_idx > 0); + assert(pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE); + const uint32_t merge_begin = + vk_render_pass_find_merge_begin(pass, subpass_idx - 1); + + for (uint32_t d = 0; d < pass->dependency_count; d++) { + const struct vk_subpass_dependency *dep = &pass->dependencies[d]; + if (dep->src_subpass == VK_SUBPASS_EXTERNAL || + dep->dst_subpass == VK_SUBPASS_EXTERNAL) + continue; + + assert(dep->src_subpass <= dep->dst_subpass); + + if (dep->src_subpass < merge_begin || subpass_idx < dep->dst_subpass) + continue; + + if (dep->src_subpass == dep->dst_subpass) + continue; + + /* We won't merge them unless the view masks match anyway, so it doesn't + * matter which one we choose. + */ + const uint32_t view_mask = + pass->subpasses[dep->dst_subpass].view_mask; + if (!vk_subpass_dependency_allowed_mid_subpass(dep, view_mask)) + return false; + } + + struct vk_render render; + vk_render_init_empty(&render); + for (uint32_t s = merge_begin; s < subpass_idx; s++) + vk_render_add_subpass(&render, pass, &pass->subpasses[s]); + + if (!vk_render_can_add_subpass(&render, pass, + &pass->subpasses[subpass_idx])) + return false; + + vk_render_add_subpass(&render, pass, &pass->subpasses[subpass_idx]); + + /* Adjust the merge flags */ + assert(pass->subpasses[subpass_idx - 1].merge & MESA_VK_SUBPASS_MERGE_END); + assert(pass->subpasses[subpass_idx].merge == MESA_VK_SUBPASS_MERGE_SINGLE); + + pass->subpasses[subpass_idx - 1].merge &= ~MESA_VK_SUBPASS_MERGE_END; + pass->subpasses[subpass_idx].merge = MESA_VK_SUBPASS_MERGE_END; + + /* Regenerate pipeline info */ + for (uint32_t s = merge_begin; s <= subpass_idx; s++) + vk_subpass_init_pipeline_infos(pass, s, &render); + + return true; +} + +struct vk_render_pass * +vk_render_pass_create(struct vk_device *device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *alloc) +{ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); VK_MULTIALLOC(ma); @@ -1211,9 +1266,8 @@ vk_common_CreateRenderPass2(VkDevice _device, VK_MULTIALLOC_DECL(&ma, struct vk_subpass_attachment, subpass_attachments, subpass_attachment_count); - if (!vk_object_multizalloc(device, &ma, pAllocator, - VK_OBJECT_TYPE_RENDER_PASS)) - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!vk_object_multizalloc(device, &ma, alloc, VK_OBJECT_TYPE_RENDER_PASS)) + return NULL; pass->attachment_count = pCreateInfo->attachmentCount; pass->attachments = attachments; @@ -1626,6 +1680,22 @@ vk_common_CreateRenderPass2(VkDevice _device, pass->fragment_density_map.layout = VK_IMAGE_LAYOUT_UNDEFINED; } + return pass; +} + +VKAPI_ATTR VkResult VKAPI_CALL +vk_common_CreateRenderPass2(VkDevice _device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(vk_device, device, _device); + + struct vk_render_pass *pass = + vk_render_pass_create(device, pCreateInfo, pAllocator); + if (pass == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; + *pRenderPass = vk_render_pass_to_handle(pass); return VK_SUCCESS; diff --git a/src/vulkan/runtime/vk_render_pass.h b/src/vulkan/runtime/vk_render_pass.h index c42970a6a67..9df5aabe048 100644 --- a/src/vulkan/runtime/vk_render_pass.h +++ b/src/vulkan/runtime/vk_render_pass.h @@ -324,6 +324,19 @@ struct vk_render_pass { VK_DEFINE_NONDISP_HANDLE_CASTS(vk_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS); +struct vk_render_pass * +vk_render_pass_create(struct vk_device *device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *alloc); + +/** Attempts to merge the given subpass index into the previous subpass + * + * Returns true if the merge succeeded. + */ +bool +vk_render_pass_try_merge_subpass(struct vk_render_pass *pass, + uint32_t subpass_idx); + /** Returns the VkPipelineRenderingCreateInfo for a graphics pipeline * * For render-pass-free drivers, this can be used in the implementation of From 4330ebe4e092f9cf174c84240a25ec063b7fd76b Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 15:47:54 -0500 Subject: [PATCH 24/41] nvk: Use MESA_VK_RP_ATTACHMENT_COLOR_BIT() --- src/nouveau/vulkan/nvk_cmd_draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 35a59c618d3..c88db1943bf 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -3437,7 +3437,7 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) uint32_t rp_att_write_mask = 0x0; for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { - if (dyn->rp.attachments & (MESA_VK_RP_ATTACHMENT_COLOR_0_BIT << a)) + if (dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_COLOR_BIT(a)) rp_att_write_mask |= 0xf << (4 * a); } From 54010d109d1fe082800233e455d48bb662717b43 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 16:30:47 -0500 Subject: [PATCH 25/41] nvk: Apply attachment remap state in BeginRendering() --- src/nouveau/vulkan/nvk_cmd_draw.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index c88db1943bf..16aa415122c 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -749,6 +749,11 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd, render->stencil_att.vk_format = inheritance_info->stencilAttachmentFormat; + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(inheritance_info->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); + const VkRenderingAttachmentLocationInfoKHR att_loc_info_default = { .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR, .colorAttachmentCount = inheritance_info->colorAttachmentCount, @@ -1404,8 +1409,17 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, if (sample_layout != NIL_SAMPLE_LAYOUT_INVALID) nvk_cmd_set_sample_layout(cmd, sample_layout); - if (render->flags & VK_RENDERING_RESUMING_BIT) + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + + if (render->flags & VK_RENDERING_RESUMING_BIT) { + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); return; + } + + /* We don't want a previous remap messing up our clears */ + vk_cmd_set_rendering_attachment_remap(&cmd->vk, NULL); for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { const struct nvk_image_view *iview = render->color_att[i].iview; @@ -1477,7 +1491,11 @@ nvk_CmdBeginRendering(VkCommandBuffer commandBuffer, P_NV9097_SET_RENDER_ENABLE_OVERRIDE(p, MODE_USE_RENDER_ENABLE); } - /* TODO: Attachment clears */ + /* This needs to be set after the clears because the clears that are part + * of CmdBeginRendering() apply to the entire render, not just the + * attachments selected by the remap. + */ + vk_cmd_set_rendering_attachment_remap(&cmd->vk, rar_info); } VKAPI_ATTR void VKAPI_CALL From 8497e53cac1759f844bf1886b5ca40f695e326b9 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 16:30:14 -0500 Subject: [PATCH 26/41] nvk: Handle color attachment remapping --- src/nouveau/vulkan/nvk_cmd_clear.c | 22 ++++++++-- src/nouveau/vulkan/nvk_cmd_draw.c | 68 ++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_clear.c b/src/nouveau/vulkan/nvk_cmd_clear.c index 6352678a430..7538b684b70 100644 --- a/src/nouveau/vulkan/nvk_cmd_clear.c +++ b/src/nouveau/vulkan/nvk_cmd_clear.c @@ -142,6 +142,21 @@ emit_clear_rects(struct nvk_cmd_buffer *cmd, } } +static uint32_t +get_color_target_index(const struct vk_dynamic_graphics_state *dyn, + uint32_t attachment) +{ + if (attachment == VK_ATTACHMENT_UNUSED) + return VK_ATTACHMENT_UNUSED; + + for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { + if (dyn->rp.color_attachment_remap[a] == attachment) + return a; + } + + return VK_ATTACHMENT_UNUSED; +} + VKAPI_ATTR void VKAPI_CALL nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, @@ -189,7 +204,9 @@ nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, if (pAttachments[i].aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) continue; - if (pAttachments[i].colorAttachment == VK_ATTACHMENT_UNUSED) + const uint32_t ct = + get_color_target_index(dyn, pAttachments[i].colorAttachment); + if (ct == VK_ATTACHMENT_UNUSED) continue; VkClearColorValue color = pAttachments[i].clearValue.color; @@ -201,8 +218,7 @@ nvk_CmdClearAttachments(VkCommandBuffer commandBuffer, P_NV9097_SET_COLOR_CLEAR_VALUE(p, 2, color.uint32[2]); P_NV9097_SET_COLOR_CLEAR_VALUE(p, 3, color.uint32[3]); - emit_clear_rects(cmd, pAttachments[i].colorAttachment, - clear_depth, clear_stencil, rectCount, pRects); + emit_clear_rects(cmd, ct, clear_depth, clear_stencil, rectCount, pRects); /* We only need to clear depth/stencil once */ clear_depth = clear_stencil = false; diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 16aa415122c..ad2a298e3df 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -3394,6 +3394,33 @@ nvk_mme_set_write_mask(struct mme_builder *b) mme_emit(b, common_mask); } +static const struct vk_color_blend_attachment_state * +get_blend_attachment_state(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + NULL : &dyn->cb.attachments[remap]; +} + +static bool +get_color_write_enabled(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + false : (dyn->cb.color_write_enables & BITFIELD_BIT(remap)); +} + +static uint8_t +get_color_attachment_location(const struct vk_dynamic_graphics_state *dyn, + uint32_t a) +{ + uint32_t remap = dyn->rp.color_attachment_remap[a]; + return remap == MESA_VK_ATTACHMENT_UNUSED ? + MESA_VK_ATTACHMENT_UNUSED : dyn->cal.color_map[remap]; +} + static void nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) { @@ -3412,16 +3439,23 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) P_IMMD(p, NV9097, SET_LOGIC_OP_FUNC, func); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { for (uint8_t a = 0; a < render->color_att_count; a++) { - P_IMMD(p, NV9097, SET_BLEND(a), dyn->cb.attachments[a].blend_enable); + const struct vk_color_blend_attachment_state *att = + get_blend_attachment_state(dyn, a); + P_IMMD(p, NV9097, SET_BLEND(a), att != NULL && att->blend_enable); } } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { for (uint8_t a = 0; a < render->color_att_count; a++) { const struct vk_color_blend_attachment_state *att = - &dyn->cb.attachments[a]; + get_blend_attachment_state(dyn, a); + if (att == NULL) + continue; + P_MTHD(p, NV9097, SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(a)); P_NV9097_SET_BLEND_PER_TARGET_SEPARATE_FOR_ALPHA(p, a, ENABLE_TRUE); P_NV9097_SET_BLEND_PER_TARGET_COLOR_OP(p, a, @@ -3442,16 +3476,20 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_ATTACHMENTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { uint32_t color_write_enables = 0x0; for (uint8_t a = 0; a < render->color_att_count; a++) { - if (dyn->cb.color_write_enables & BITFIELD_BIT(a)) + if (get_color_write_enabled(dyn, a)) color_write_enables |= 0xf << (4 * a); } uint32_t cb_att_write_mask = 0x0; - for (uint8_t a = 0; a < render->color_att_count; a++) - cb_att_write_mask |= dyn->cb.attachments[a].write_mask << (a * 4); + for (uint8_t a = 0; a < render->color_att_count; a++) { + const struct vk_color_blend_attachment_state *att = + get_blend_attachment_state(dyn, a); + cb_att_write_mask |= (att != NULL ? att->write_mask : 0) << (a * 4); + } uint32_t rp_att_write_mask = 0x0; for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { @@ -3461,7 +3499,8 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) uint32_t att_has_loc_mask = 0x0; for (uint8_t a = 0; a < MESA_VK_MAX_COLOR_ATTACHMENTS; a++) { - if (dyn->cal.color_map[a] != MESA_VK_ATTACHMENT_UNUSED) + uint8_t loc = get_color_attachment_location(dyn, a); + if (loc != MESA_VK_ATTACHMENT_UNUSED) att_has_loc_mask |= 0xf << (4 * a); } @@ -3473,19 +3512,22 @@ nvk_flush_cb_state(struct nvk_cmd_buffer *cmd) att_has_loc_mask); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_ATTACHMENTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP)) { int8_t loc_att[NVK_MAX_RTS] = { -1, -1, -1, -1, -1, -1, -1, -1}; uint8_t max_loc = 0; uint32_t att_used = 0; for (uint8_t a = 0; a < render->color_att_count; a++) { - if (dyn->cal.color_map[a] == MESA_VK_ATTACHMENT_UNUSED) + uint8_t loc = get_color_attachment_location(dyn, a); + if (loc == MESA_VK_ATTACHMENT_UNUSED) continue; att_used |= BITFIELD_BIT(a); - assert(dyn->cal.color_map[a] < NVK_MAX_RTS); - loc_att[dyn->cal.color_map[a]] = a; - max_loc = MAX2(max_loc, dyn->cal.color_map[a]); + assert(loc < NVK_MAX_RTS); + loc_att[loc] = a; + max_loc = MAX2(max_loc, loc); } for (uint8_t l = 0; l < NVK_MAX_RTS; l++) { From 033ff8b1243169cffedaca62941c8445628e993b Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 16:31:15 -0500 Subject: [PATCH 27/41] nvk: Handle depthStencilAttachmentEnable --- src/nouveau/vulkan/nvk_cmd_draw.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index ad2a298e3df..2069e262bc9 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -3197,14 +3197,18 @@ nvk_flush_ds_state(struct nvk_cmd_buffer *cmd) const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.depth.test_enable && + dyn->rp.depth_stencil_attachment_enable && render->depth_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_DEPTH_TEST, enable); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.depth.write_enable && + dyn->rp.depth_stencil_attachment_enable && render->depth_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_DEPTH_WRITE, enable); } @@ -3226,8 +3230,10 @@ nvk_flush_ds_state(struct nvk_cmd_buffer *cmd) P_NV9097_SET_DEPTH_BOUNDS_MAX(p, fui(dyn->ds.depth.bounds_test.max)); } - if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE)) { + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RP_REMAP)) { bool enable = dyn->ds.stencil.test_enable && + dyn->rp.depth_stencil_attachment_enable && render->stencil_att.vk_format != VK_FORMAT_UNDEFINED; P_IMMD(p, NV9097, SET_STENCIL_TEST, enable); } From 2eadf741832ec8677f2a5341be5e5d5b8b9c3f39 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 12:46:01 -0500 Subject: [PATCH 28/41] panfrost: Use the right format for discarded color targets Even if the color target is eventually discarded, we still want to use the right internal format so we get the correct precision for color target reads. --- src/panfrost/lib/pan_desc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index bfd2d572b3f..95e5d176f76 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -931,7 +931,12 @@ pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx, unsigned idx, cfg.internal_buffer_offset = cbuf_offset; cfg.clear = rt_clear(&fb->rts[idx]); cfg.dithering_enable = true; - cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; + if (rt) { + get_rt_formats(rt->format, &cfg.writeback_format, + &cfg.internal_format, &cfg.swizzle); + } else { + cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8; + } #if PAN_ARCH >= 7 cfg.writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; #endif From f79ddd08e74834c85b1d0616e63d0e0da0d890da Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 11:27:17 -0500 Subject: [PATCH 29/41] panvk: Implement STORE_OP_DONT_CARE --- src/panfrost/vulkan/panvk_vX_cmd_draw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index ab6bbe55d8a..8ab1c184562 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -48,6 +48,12 @@ att_set_clear_preload(const VkRenderingAttachmentInfo *att, bool *clear, bool *p } } +static void +att_set_discard(const VkRenderingAttachmentInfo *att, bool *discard) +{ + *discard = att->storeOp != VK_ATTACHMENT_STORE_OP_STORE; +} + static void render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf, const VkRenderingAttachmentInfo *att, @@ -97,6 +103,7 @@ render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf, att_set_clear_preload(att, &fbinfo->rts[index].clear, &fbinfo->rts[index].preload); + att_set_discard(att, &fbinfo->rts[index].discard); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = @@ -159,6 +166,7 @@ render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf, fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth; att_set_clear_preload(att, &fbinfo->zs.clear.z, &fbinfo->zs.preload.z); + att_set_discard(att, &fbinfo->zs.discard.z); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = @@ -241,6 +249,7 @@ render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf, fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil; att_set_clear_preload(att, &fbinfo->zs.clear.s, &fbinfo->zs.preload.s); + att_set_discard(att, &fbinfo->zs.discard.s); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = From 2591ea65dbe226528baf666578de6c9f34384049 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 10:12:53 -0500 Subject: [PATCH 30/41] HACK: panvk: Stop splitting the tile job --- src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 804add2d7b7..7eb0e9ec5ae 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -406,15 +406,15 @@ should_split_render_pass(const uint32_t wait_masks[static PANVK_SUBQUEUE_COUNT], BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) return true; - /* split if the fragment subqueue self-waits with a feedback loop, because - * we lower subpassLoad to texelFetch - */ - if ((wait_masks[PANVK_SUBQUEUE_FRAGMENT] & - BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) && - (src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) && - (dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT)) - return true; +// /* split if the fragment subqueue self-waits with a feedback loop, because +// * we lower subpassLoad to texelFetch +// */ +// if ((wait_masks[PANVK_SUBQUEUE_FRAGMENT] & +// BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) && +// (src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | +// VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) && +// (dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT)) +// return true; return false; } From dd7e168a2ed202e5fb12e2df5be8ae54f1c02a38 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 12:44:54 -0500 Subject: [PATCH 31/41] HACK: panvk: More discard --- src/panfrost/lib/pan_desc.c | 6 +++--- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 95e5d176f76..9ac174e0763 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -703,7 +703,7 @@ GENX(pan_emit_afbc_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, AFBC_RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = true; + cfg.write_enable = !rt->discard; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); @@ -768,7 +768,7 @@ GENX(pan_emit_u_tiled_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = true; + cfg.write_enable = !rt->discard; cfg.writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); @@ -797,7 +797,7 @@ GENX(pan_emit_linear_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = true; + cfg.write_enable = !rt->discard; cfg.writeback_block_format = MALI_BLOCK_FORMAT_LINEAR; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index f0d81a63cd4..c85d2ff750c 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1228,10 +1228,10 @@ prepare_incremental_rendering_fbinfos( struct pan_fb_info *ir_fb = &ir_fbinfos[PANVK_IR_FIRST_PASS]; memcpy(ir_fb, fbinfo, sizeof(*ir_fb)); - for (unsigned i = 0; i < fbinfo->rt_count; i++) - ir_fb->rts[i].discard = false; - ir_fb->zs.discard.z = false; - ir_fb->zs.discard.s = false; +// for (unsigned i = 0; i < fbinfo->rt_count; i++) +// ir_fb->rts[i].discard = false; +// ir_fb->zs.discard.z = false; +// ir_fb->zs.discard.s = false; /* Subsequent incremental rendering passes: preload old content and don't * discard result */ From 6ff7c35783a79a366b5629adb53d63d6b3917a09 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 18:50:01 -0500 Subject: [PATCH 32/41] Revert "panvk: Implement STORE_OP_DONT_CARE" This reverts commit d3c2f586caafde00a24a56ada96f5e6c048316cd. --- src/panfrost/vulkan/panvk_vX_cmd_draw.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 8ab1c184562..ab6bbe55d8a 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -48,12 +48,6 @@ att_set_clear_preload(const VkRenderingAttachmentInfo *att, bool *clear, bool *p } } -static void -att_set_discard(const VkRenderingAttachmentInfo *att, bool *discard) -{ - *discard = att->storeOp != VK_ATTACHMENT_STORE_OP_STORE; -} - static void render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf, const VkRenderingAttachmentInfo *att, @@ -103,7 +97,6 @@ render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf, att_set_clear_preload(att, &fbinfo->rts[index].clear, &fbinfo->rts[index].preload); - att_set_discard(att, &fbinfo->rts[index].discard); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = @@ -166,7 +159,6 @@ render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf, fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth; att_set_clear_preload(att, &fbinfo->zs.clear.z, &fbinfo->zs.preload.z); - att_set_discard(att, &fbinfo->zs.discard.z); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = @@ -249,7 +241,6 @@ render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf, fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil; att_set_clear_preload(att, &fbinfo->zs.clear.s, &fbinfo->zs.preload.s); - att_set_discard(att, &fbinfo->zs.discard.s); if (att->resolveMode != VK_RESOLVE_MODE_NONE) { struct panvk_resolve_attachment *resolve_info = From 454c76d3bed20d45b76f8997216f0f050108ed36 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 18:50:09 -0500 Subject: [PATCH 33/41] Revert "HACK: panvk: Stop splitting the tile job" This reverts commit 24ed4cc2f2a0606bee76392bccc6a7afb047399e. --- src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index 7eb0e9ec5ae..804add2d7b7 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -406,15 +406,15 @@ should_split_render_pass(const uint32_t wait_masks[static PANVK_SUBQUEUE_COUNT], BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) return true; -// /* split if the fragment subqueue self-waits with a feedback loop, because -// * we lower subpassLoad to texelFetch -// */ -// if ((wait_masks[PANVK_SUBQUEUE_FRAGMENT] & -// BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) && -// (src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | -// VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) && -// (dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT)) -// return true; + /* split if the fragment subqueue self-waits with a feedback loop, because + * we lower subpassLoad to texelFetch + */ + if ((wait_masks[PANVK_SUBQUEUE_FRAGMENT] & + BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)) && + (src_access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) && + (dst_access & VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT)) + return true; return false; } From 828a8b8fb3faa681ac511c81c5c555d8e761432d Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Thu, 11 Dec 2025 18:50:24 -0500 Subject: [PATCH 34/41] Revert "HACK: panvk: More discard" This reverts commit 47ce3983efd2674501b1764a2979c43060e9d0bd. --- src/panfrost/lib/pan_desc.c | 6 +++--- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 9ac174e0763..95e5d176f76 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -703,7 +703,7 @@ GENX(pan_emit_afbc_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, AFBC_RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = !rt->discard; + cfg.write_enable = true; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); @@ -768,7 +768,7 @@ GENX(pan_emit_u_tiled_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = !rt->discard; + cfg.write_enable = true; cfg.writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); @@ -797,7 +797,7 @@ GENX(pan_emit_linear_color_attachment)(const struct pan_fb_info *fb, assert(!pan_format_is_yuv(iview->format)); pan_cast_and_pack(payload, RGB_RENDER_TARGET, cfg) { rt_common_cfg(rt, cbuf_offset, fb->tile_size, cfg); - cfg.write_enable = !rt->discard; + cfg.write_enable = true; cfg.writeback_block_format = MALI_BLOCK_FORMAT_LINEAR; get_rt_formats(iview->format, &cfg.writeback_format, &cfg.internal_format, &cfg.swizzle); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index c85d2ff750c..f0d81a63cd4 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1228,10 +1228,10 @@ prepare_incremental_rendering_fbinfos( struct pan_fb_info *ir_fb = &ir_fbinfos[PANVK_IR_FIRST_PASS]; memcpy(ir_fb, fbinfo, sizeof(*ir_fb)); -// for (unsigned i = 0; i < fbinfo->rt_count; i++) -// ir_fb->rts[i].discard = false; -// ir_fb->zs.discard.z = false; -// ir_fb->zs.discard.s = false; + for (unsigned i = 0; i < fbinfo->rt_count; i++) + ir_fb->rts[i].discard = false; + ir_fb->zs.discard.z = false; + ir_fb->zs.discard.s = false; /* Subsequent incremental rendering passes: preload old content and don't * discard result */ From 2307cc46b0a1920f4a5647fefce8653d34e4d310 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 12 Dec 2025 13:18:32 -0500 Subject: [PATCH 35/41] panvk: Set rendering remap state on BeginRendering/CommandBuffer() --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 5 +++++ src/panfrost/vulkan/panvk_vX_cmd_draw.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index f0d81a63cd4..5bede0db98a 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -2914,6 +2914,11 @@ panvk_per_arch(cmd_inherit_render_state)( att_loc_info = &att_loc_info_default; vk_cmd_set_rendering_attachment_locations(&cmdbuf->vk, att_loc_info); + + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(inheritance_info->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmdbuf->vk, rar_info); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index ab6bbe55d8a..fe2494d7904 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -354,6 +354,11 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, } assert(fbinfo->width && fbinfo->height); + + const VkRenderingAttachmentRemapInfoMESA *rar_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_ATTACHMENT_REMAP_INFO_MESA); + vk_cmd_set_rendering_attachment_remap(&cmdbuf->vk, rar_info); } void From 7d6b44f4d42abd6c071f11ea1d0f944ef77527d1 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 12 Dec 2025 10:42:43 -0500 Subject: [PATCH 36/41] panvk: Handle color attachment remapping Getting color attachment remapping right is mostly about sorting out three different indices: Render target, API attachment, and shader location. Fortunately for us, since panvk uses meta to implement CmdClearAttachments(), all we really have to do is fix up the blend descriptors and it sorts out everything. --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 1 + src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 1 + src/panfrost/vulkan/panvk_vX_blend.c | 86 +++++++++++---------- 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 5bede0db98a..6a0f0c8ca29 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -652,6 +652,7 @@ prepare_blend(struct panvk_cmd_buffer *cmdbuf) dyn_gfx_state_dirty(cmdbuf, CB_BLEND_EQUATIONS) || dyn_gfx_state_dirty(cmdbuf, CB_WRITE_MASKS) || dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || dyn_gfx_state_dirty(cmdbuf, COLOR_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, RENDER_STATE); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index e381fd614a8..5ecb05e5a8b 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -225,6 +225,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || gfx_state_dirty(cmdbuf, FS) || gfx_state_dirty(cmdbuf, OQ) || gfx_state_dirty(cmdbuf, RENDER_STATE); diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 1a2853a6354..d1622af9fbd 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -326,7 +326,7 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, uint64_t blend_shaders[8] = {}; /* All bits set to one encodes unused fixed-function blend constant. */ unsigned ff_blend_constant = ~0; - uint8_t remap_catts[MAX_RTS] = { + uint8_t loc_rt[MAX_RTS] = { MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, MESA_VK_ATTACHMENT_UNUSED, @@ -334,41 +334,50 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, }; uint32_t blend_count = MAX2(cmdbuf->state.gfx.render.fb.info.rt_count, 1); - static_assert(ARRAY_SIZE(remap_catts) <= ARRAY_SIZE(cal->color_map), - "vk_color_attachment_location_state::color_map is too small"); - - for (uint32_t i = 0; i < ARRAY_SIZE(remap_catts); i++) { - if (cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED) { - assert(cal->color_map[i] < MAX_RTS); - remap_catts[cal->color_map[i]] = i; - } - } - memset(blend_info, 0, sizeof(*blend_info)); - for (uint8_t i = 0; i < cb->attachment_count; i++) { - struct pan_blend_rt_state *rt = &bs.rts[i]; - if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED) { + for (uint8_t rt_idx = 0; rt_idx < MAX_RTS; rt_idx++) { + struct pan_blend_rt_state *rt = &bs.rts[rt_idx]; + + /* This is the API-level attachment for things other than the bound + * render targets. In particular, we use this index for blend + * attachments and color write enables. + */ + const uint8_t att = dyns->rp.color_attachment_remap[rt_idx]; + if (att == MESA_VK_ATTACHMENT_UNUSED) { + rt->equation.color_mask = 0; + continue; + } + assert(att < MAX_RTS); + + /* This is the location inside the shader where this color attachment + * will be bound. + */ + const uint8_t loc = cal->color_map[att]; + if (loc == MESA_VK_ATTACHMENT_UNUSED) { + rt->equation.color_mask = 0; + continue; + } + assert(loc < MAX_RTS); + loc_rt[loc] = rt_idx; + + if (!(cb->color_write_enables & BITFIELD_BIT(att))) { rt->equation.color_mask = 0; continue; } - if (!(cb->color_write_enables & BITFIELD_BIT(i))) { + if (color_attachment_formats[rt_idx] == VK_FORMAT_UNDEFINED) { rt->equation.color_mask = 0; continue; } - if (color_attachment_formats[i] == VK_FORMAT_UNDEFINED) { + if (!cb->attachments[att].write_mask) { rt->equation.color_mask = 0; continue; } - if (!cb->attachments[i].write_mask) { - rt->equation.color_mask = 0; - continue; - } - - rt->format = vk_format_to_pipe_format(color_attachment_formats[i]); + rt->format = vk_format_to_pipe_format(color_attachment_formats[rt_idx]); + rt->nr_samples = color_attachment_samples[rt_idx]; /* Disable blending for LOGICOP_NOOP unless the format is float/srgb */ if (bs.logicop_enable && bs.logicop_func == PIPE_LOGICOP_NOOP && @@ -378,21 +387,20 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, continue; } - rt->nr_samples = color_attachment_samples[i]; - rt->equation.blend_enable = cb->attachments[i].blend_enable; - rt->equation.color_mask = cb->attachments[i].write_mask; + rt->equation.blend_enable = cb->attachments[att].blend_enable; + rt->equation.color_mask = cb->attachments[att].write_mask; rt->equation.rgb_func = - vk_blend_op_to_pipe(cb->attachments[i].color_blend_op); + vk_blend_op_to_pipe(cb->attachments[att].color_blend_op); rt->equation.rgb_src_factor = - vk_blend_factor_to_pipe(cb->attachments[i].src_color_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].src_color_blend_factor); rt->equation.rgb_dst_factor = - vk_blend_factor_to_pipe(cb->attachments[i].dst_color_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].dst_color_blend_factor); rt->equation.alpha_func = - vk_blend_op_to_pipe(cb->attachments[i].alpha_blend_op); + vk_blend_op_to_pipe(cb->attachments[att].alpha_blend_op); rt->equation.alpha_src_factor = - vk_blend_factor_to_pipe(cb->attachments[i].src_alpha_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].src_alpha_blend_factor); rt->equation.alpha_dst_factor = - vk_blend_factor_to_pipe(cb->attachments[i].dst_alpha_blend_factor); + vk_blend_factor_to_pipe(cb->attachments[att].dst_alpha_blend_factor); bool dest_has_alpha = util_format_has_alpha(rt->format); if (!dest_has_alpha) { @@ -409,12 +417,12 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation); - if (blend_needs_shader(&bs, i, &ff_blend_constant)) { - nir_alu_type src0_type = fs_info->bifrost.blend[i].type; + if (blend_needs_shader(&bs, rt_idx, &ff_blend_constant)) { + nir_alu_type src0_type = fs_info->bifrost.blend[loc].type; nir_alu_type src1_type = fs_info->bifrost.blend_src1_type; VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type, - i, &blend_shaders[i]); + rt_idx, &blend_shaders[rt_idx]); if (result != VK_SUCCESS) return result; @@ -429,13 +437,13 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, ff_blend_constant = 0; /* Now that we've collected all the information, we can emit. */ - for (uint8_t i = 0; i < blend_count; i++) { - uint32_t catt_idx = remap_catts[i]; + for (uint8_t loc = 0; loc < blend_count; loc++) { + uint32_t rt_idx = loc_rt[loc]; uint64_t blend_shader = - catt_idx != MESA_VK_ATTACHMENT_UNUSED ? blend_shaders[catt_idx] : 0; + rt_idx != MESA_VK_ATTACHMENT_UNUSED ? blend_shaders[rt_idx] : 0; - emit_blend_desc(fs_info, fs_code, &bs, i, catt_idx, - blend_shader, ff_blend_constant, &bds[i]); + emit_blend_desc(fs_info, fs_code, &bs, loc, rt_idx, + blend_shader, ff_blend_constant, &bds[loc]); } if (blend_info->shader_loads_blend_const) From 8b793dd7d9d4f918e6b510214573ed05230da1fc Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 12 Dec 2025 12:16:02 -0500 Subject: [PATCH 37/41] panvk/csf: Respect depthStencilAttachmentEnable --- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 6a0f0c8ca29..42323d55456 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -454,6 +454,8 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0; } @@ -461,6 +463,8 @@ has_depth_att(struct panvk_cmd_buffer *cmdbuf) static bool has_stencil_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0; } @@ -1799,6 +1803,7 @@ prepare_ds(struct panvk_cmd_buffer *cmdbuf, struct pan_earlyzs_state earlyzs) dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || dyn_gfx_state_dirty(cmdbuf, INPUT_ATTACHMENT_MAP) || fs_user_dirty(cmdbuf) || gfx_state_dirty(cmdbuf, OQ); @@ -1960,6 +1965,7 @@ prepare_dcd(struct panvk_cmd_buffer *cmdbuf, dyn_gfx_state_dirty(cmdbuf, MS_SAMPLE_MASK) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) || dyn_gfx_state_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || + dyn_gfx_state_dirty(cmdbuf, RP_REMAP) || /* writes_depth() uses vk_depth_stencil_state */ dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) || dyn_gfx_state_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) || From d13b55a917449972f381746ef0d4121e3fb318dd Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 12 Dec 2025 12:17:43 -0500 Subject: [PATCH 38/41] panvk/jm: Respect depthStencilAttachmentEnable --- src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 5ecb05e5a8b..bfd01f1e93d 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -92,6 +92,8 @@ is_indirect_draw(const struct panvk_draw_data *draw) static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0; } @@ -99,6 +101,8 @@ has_depth_att(struct panvk_cmd_buffer *cmdbuf) static bool has_stencil_att(struct panvk_cmd_buffer *cmdbuf) { + if (!cmdbuf->vk.dynamic_graphics_state.rp.depth_stencil_attachment_enable) + return false; return (cmdbuf->state.gfx.render.bound_attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0; } From afb4164c1437251f536c37f63d46cc024e4ff251 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 10 Dec 2025 09:59:26 -0500 Subject: [PATCH 39/41] HACK: Merge subpasses --- src/vulkan/runtime/vk_render_pass.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vulkan/runtime/vk_render_pass.c b/src/vulkan/runtime/vk_render_pass.c index 259e39b9387..a792ccb32a6 100644 --- a/src/vulkan/runtime/vk_render_pass.c +++ b/src/vulkan/runtime/vk_render_pass.c @@ -1696,6 +1696,9 @@ vk_common_CreateRenderPass2(VkDevice _device, if (pass == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; + for (uint32_t s = 1; s < pass->subpass_count; s++) + vk_render_pass_try_merge_subpass(pass, s); + *pRenderPass = vk_render_pass_to_handle(pass); return VK_SUCCESS; From c0990d567ddd1f2c31c648288111335278f39532 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 15 Dec 2025 15:43:02 -0500 Subject: [PATCH 40/41] HACK: Remap at compile time --- src/panfrost/vulkan/panvk_vX_blend.c | 2 +- src/panfrost/vulkan/panvk_vX_shader.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index d1622af9fbd..7bef529116f 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -353,7 +353,7 @@ panvk_per_arch(blend_emit_descs)(struct panvk_cmd_buffer *cmdbuf, /* This is the location inside the shader where this color attachment * will be bound. */ - const uint8_t loc = cal->color_map[att]; + const uint8_t loc = cal->color_map[rt_idx]; if (loc == MESA_VK_ATTACHMENT_UNUSED) { rt->equation.color_mask = 0; continue; diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 1fe7f4a52f8..4ed8ded394f 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -488,6 +488,10 @@ panvk_hash_state(struct vk_physical_device *device, _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, sizeof(state->rp->view_mask)); + if (state->rp) + _mesa_blake3_update(&blake3_ctx, state->rp->color_attachment_remap, + sizeof(state->rp->color_attachment_remap)); + if (state->ial) _mesa_blake3_update(&blake3_ctx, state->ial, sizeof(*state->ial)); } @@ -1373,6 +1377,29 @@ panvk_compile_shader(struct panvk_device *dev, if (state && state->ms && state->ms->sample_shading_enable) nir->info.fs.uses_sample_shading = true; + bool demoted_output = false; + nir_foreach_shader_out_variable(var, nir) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + uint32_t loc = var->data.location - FRAG_RESULT_DATA0; + uint32_t rt; + for (rt = 0; rt < MAX_RTS; rt++) { + if (state->rp->color_attachment_remap[rt] == loc) + break; + } + if (rt < MAX_RTS) { + var->data.location = FRAG_RESULT_DATA0 + rt; + } else { + var->data.mode = nir_var_shader_temp; + demoted_output = true; + } + } + if (demoted_output) { + NIR_PASS(_, nir, nir_fixup_deref_modes); + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + } + /* We need to lower input attachments before we lower descriptors */ NIR_PASS(_, nir, panvk_per_arch(nir_lower_input_attachment_loads), state, &variant->fs.input_attachment_read); From 754f4a51e7f10befd1ae2d3ab51ebbf82a46d091 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 15 Dec 2025 16:17:24 -0500 Subject: [PATCH 41/41] HACK: CPU remap attachment clears --- src/panfrost/vulkan/panvk_vX_cmd_meta.c | 33 +++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index 970b2b78fd6..a98b855c57b 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -235,6 +235,8 @@ panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, const VkClearRect *pRects) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + const struct vk_dynamic_graphics_state *dyns = + &cmdbuf->vk.dynamic_graphics_state; struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_cmd_meta_graphics_save_ctx save = {0}; struct vk_meta_rendering_info render = { @@ -252,9 +254,36 @@ panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; } + STACK_ARRAY(VkClearAttachment, attachments, attachmentCount); + + uint32_t attachment_count = 0; + for (uint32_t i = 0; i < attachmentCount; i++) { + if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)) { + attachments[attachment_count++] = pAttachments[i]; + continue; + } + + uint32_t rt; + for (rt = 0; rt < MAX_RTS; rt++) { + if (dyns->rp.color_attachment_remap[rt] == + pAttachments[i].colorAttachment) + break; + } + if (rt == MAX_RTS) + continue; + + attachments[attachment_count++] = (VkClearAttachment) { + .aspectMask = pAttachments[i].aspectMask, + .colorAttachment = rt, + .clearValue = pAttachments[i].clearValue, + }; + } + panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save); - vk_meta_clear_attachments(&cmdbuf->vk, &dev->meta, &render, attachmentCount, - pAttachments, rectCount, pRects); + vk_meta_clear_attachments(&cmdbuf->vk, &dev->meta, &render, + attachment_count, attachments, + rectCount, pRects); panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save); }