diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index e0499eb645f..33156fc2d1b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -3521,6 +3521,17 @@ tu_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, cmd_buffer->descriptors[i].max_dynamic_offset_size = 0; } + util_dynarray_foreach (&cmd_buffer->msrtss_color_temporaries, + struct tu_device_memory *, mem) { + tu_destroy_memory(cmd_buffer->device, *mem); + } + util_dynarray_clear(&cmd_buffer->msrtss_color_temporaries); + util_dynarray_foreach (&cmd_buffer->msrtss_depth_temporaries, + struct tu_device_memory *, mem) { + tu_destroy_memory(cmd_buffer->device, *mem); + } + util_dynarray_clear(&cmd_buffer->msrtss_depth_temporaries); + u_trace_fini(&cmd_buffer->trace); u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->trace_context); u_trace_fini(&cmd_buffer->rp_trace); @@ -5869,7 +5880,7 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, vk_multialloc_add(&ma, &cmd->state.attachments, const struct tu_image_view *, pass->attachment_count); vk_multialloc_add(&ma, &cmd->state.clear_values, VkClearValue, - pRenderPassBegin->clearValueCount); + pass->attachment_count); if (!vk_multialloc_alloc(&ma, &cmd->vk.pool->alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) { vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -5881,14 +5892,34 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, tu_cs_emit_call(&cmd->cs, cmd->device->dbg_renderpass_stomp_cs); } - for (unsigned i = 0; i < pass->attachment_count; i++) { + for (unsigned i = 0; i < pass->user_attachment_count; i++) { cmd->state.attachments[i] = pAttachmentInfo ? tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) : cmd->state.framebuffer->attachments[i]; } + + for (unsigned i = 0; i < pass->attachment_count - pass->user_attachment_count; i++) { + /* With imageless attachments, the only attachments in the framebuffer + * are MSRTSS attachments. Without imageless attachments, they are after + * the user's attachments. + */ + unsigned fb_idx = i + (pAttachmentInfo ? 0 : pass->user_attachment_count); + cmd->state.attachments[i + pass->user_attachment_count] = + cmd->state.framebuffer->attachments[fb_idx]; + } + if (pass->attachment_count) { - for (unsigned i = 0; i < pRenderPassBegin->clearValueCount; i++) - cmd->state.clear_values[i] = pRenderPassBegin->pClearValues[i]; + for (unsigned i = 0; i < MIN2(pRenderPassBegin->clearValueCount, + pass->user_attachment_count); i++) { + struct tu_render_pass_attachment *att = &pass->attachments[i]; + uint32_t idx = i; + /* Clear values have to be remapped for MSRTSS, because they may be + * moved to the multisample attachment. + */ + if (att->remapped_clear_att != VK_ATTACHMENT_UNUSED) + idx = att->remapped_clear_att; + cmd->state.clear_values[idx] = pRenderPassBegin->pClearValues[i]; + } } tu_choose_gmem_layout(cmd); @@ -5939,19 +5970,30 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.clear_values = cmd->dynamic_clear_values; for (unsigned i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { - uint32_t a = cmd->dynamic_subpass.color_attachments[i].attachment; if (!pRenderingInfo->pColorAttachments[i].imageView) continue; + uint32_t a = cmd->dynamic_subpass.color_attachments[i].attachment; cmd->state.clear_values[a] = pRenderingInfo->pColorAttachments[i].clearValue; + /* With MSRTSS, the user's attachment corresponds to the + * resolve/unresolve attachment, not the color attachment. The color + * attachment is the transient multisample attachment. However the clear + * happens on the multisample attachment, so we don't remap the + * clear_values assignment above. + */ + bool msrtss = false; + if (a >= cmd->dynamic_pass.user_attachment_count) { + a = cmd->dynamic_pass.attachments[a].user_att; + msrtss = true; + } VK_FROM_HANDLE(tu_image_view, view, pRenderingInfo->pColorAttachments[i].imageView); cmd->state.attachments[a] = view; a = cmd->dynamic_subpass.resolve_attachments[i].attachment; - if (a != VK_ATTACHMENT_UNUSED) { + if (!msrtss && a != VK_ATTACHMENT_UNUSED) { VK_FROM_HANDLE(tu_image_view, resolve_view, pRenderingInfo->pColorAttachments[i].resolveImageView); cmd->state.attachments[a] = resolve_view; @@ -5967,7 +6009,6 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, pRenderingInfo->pStencilAttachment; if (common_info && common_info->imageView != VK_NULL_HANDLE) { VK_FROM_HANDLE(tu_image_view, view, common_info->imageView); - cmd->state.attachments[a] = view; if (pRenderingInfo->pDepthAttachment) { cmd->state.clear_values[a].depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; @@ -5978,7 +6019,15 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil; } - if (cmd->dynamic_subpass.resolve_count > + bool msrtss = false; + if (a >= cmd->dynamic_pass.user_attachment_count) { + a = cmd->dynamic_pass.attachments[a].user_att; + msrtss = true; + } + + cmd->state.attachments[a] = view; + + if (!msrtss && cmd->dynamic_subpass.resolve_count > cmd->dynamic_subpass.color_count) { VK_FROM_HANDLE(tu_image_view, resolve_view, common_info->resolveImageView); @@ -6014,6 +6063,12 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.attachments[a] = view; } + VkResult result = tu_setup_dynamic_msrtss(cmd); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd->vk, result); + return; + } + tu_choose_gmem_layout(cmd); cmd->state.renderpass_cache.pending_flush_bits = diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index eec815c8f7b..30304e02898 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -18,6 +18,7 @@ #include "tu_lrz.h" #include "tu_pass.h" #include "tu_pipeline.h" +#include "tu_image.h" enum tu_draw_state_group_id { @@ -633,12 +634,15 @@ struct tu_cmd_buffer struct tu_descriptor_state descriptors[MAX_BIND_POINTS]; - struct tu_render_pass_attachment dynamic_rp_attachments[2 * (MAX_RTS + 1) + 2]; + struct tu_render_pass_attachment dynamic_rp_attachments[3 * (MAX_RTS + 1) + 2]; struct tu_subpass_attachment dynamic_color_attachments[MAX_RTS]; struct tu_subpass_attachment dynamic_input_attachments[MAX_RTS + 1]; struct tu_subpass_attachment dynamic_resolve_attachments[MAX_RTS + 1]; - const struct tu_image_view *dynamic_attachments[2 * (MAX_RTS + 1) + 2]; - VkClearValue dynamic_clear_values[2 * (MAX_RTS + 1)]; + struct tu_subpass_attachment dynamic_unresolve_attachments[MAX_RTS + 1]; + const struct tu_image_view *dynamic_attachments[3 * (MAX_RTS + 1) + 2]; + VkClearValue dynamic_clear_values[3 * (MAX_RTS + 1)]; + struct tu_image_view dynamic_msrtss_iviews[MAX_RTS + 1]; + struct tu_image dynamic_msrtss_images[MAX_RTS + 1]; struct tu_render_pass dynamic_pass; struct tu_subpass dynamic_subpass; @@ -674,6 +678,12 @@ struct tu_cmd_buffer void *patchpoints_ctx; } pre_chain; + /* The current MSRTSS temporary buffer. */ + struct tu_bo *msrtt_temporary; + + struct util_dynarray msrtss_color_temporaries; + struct util_dynarray msrtss_depth_temporaries; + uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; uint64_t vsc_draw_strm_va, vsc_draw_strm_size_va, vsc_prim_strm_va; diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index ab63d2207a2..347f630509f 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -304,6 +304,7 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_map_memory_placed = true, .EXT_memory_budget = true, .EXT_multi_draw = true, + .EXT_multisampled_render_to_single_sampled = true, .EXT_mutable_descriptor_type = true, .EXT_nested_command_buffer = true, .EXT_non_seamless_cube_map = true, @@ -814,6 +815,9 @@ tu_get_features(struct tu_physical_device *pdevice, /* VK_EXT_dynamic_rendering_unused_attachments */ features->dynamicRenderingUnusedAttachments = true; + + /* VK_EXT_multisampled_render_to_single_sampled */ + features->multisampledRenderToSingleSampled = true; } static void @@ -3167,6 +3171,11 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) vk_free(&device->vk.alloc, device->queues[i]); } + if (device->msrtss_color_temporary) + tu_destroy_memory(device, device->msrtss_color_temporary); + if (device->msrtss_depth_temporary) + tu_destroy_memory(device, device->msrtss_depth_temporary); + tu_drm_device_finish(device); if (device->physical_device->has_set_iova) @@ -3284,6 +3293,112 @@ tu_add_to_heap(struct tu_device *dev, struct tu_bo *bo) return VK_SUCCESS; } +static VkResult +_tu_init_memory(struct tu_device *device, + struct tu_device_memory *mem, + VkMemoryPropertyFlags mem_property, + enum tu_bo_alloc_flags alloc_flags, + VkDeviceSize size, + VkDeviceAddress client_address, + const char *name) +{ + VkResult result; + + if (mem_property & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { + mem->lazy = true; + mtx_init(&mem->lazy_mutex, mtx_plain); + enum tu_sparse_vma_flags sparse_flags = + (alloc_flags & TU_BO_ALLOC_REPLAYABLE) ? + TU_SPARSE_VMA_REPLAYABLE : TU_SPARSE_VMA_NONE; + result = tu_sparse_vma_init(device, &mem->vk.base, + &mem->lazy_vma, &mem->iova, + sparse_flags, + size, + client_address); + } else { + result = tu_bo_init_new_explicit_iova( + device, &mem->vk.base, &mem->bo, size, + client_address, mem_property, alloc_flags, NULL, name); + } + + return result; +} + +static VkResult +tu_create_memory(struct tu_device *device, + struct tu_device_memory **mem_out, + VkMemoryPropertyFlags mem_property, + enum tu_bo_alloc_flags alloc_flags, + VkDeviceSize size, + const char *name) +{ + struct tu_device_memory *mem = + (struct tu_device_memory *) vk_object_zalloc( + &device->vk, NULL, sizeof(*mem), VK_OBJECT_TYPE_DEVICE_MEMORY); + + if (!mem) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + mem->vk.size = size; + mem->size = size; + mem->refcnt = 1; + + VkResult result = _tu_init_memory(device, mem, mem_property, alloc_flags, + size, 0, name); + + if (result != VK_SUCCESS) { + vk_object_free(&device->vk, NULL, mem); + return result; + } + + if (!mem->lazy) { + result = tu_add_to_heap(device, mem->bo); + mem->iova = mem->bo->iova; + + if (result != VK_SUCCESS) { + vk_object_free(&device->vk, NULL, mem); + return result; + } + } + + *mem_out = mem; + return VK_SUCCESS; +} + +static struct tu_device_memory * +tu_memory_get_ref(struct tu_device_memory *mem) +{ + p_atomic_inc(&mem->refcnt); + return mem; +} + +static void +_tu_destroy_memory(struct tu_device *device, + struct tu_device_memory *mem) +{ + if (mem->bo) { + p_atomic_add(&device->physical_device->heap.used, -mem->bo->size); + tu_bo_finish(device, mem->bo); + } + + if (mem->lazy) { + tu_sparse_vma_finish(device, &mem->lazy_vma); + mtx_destroy(&mem->lazy_mutex); + } +} + +void +tu_destroy_memory(struct tu_device *device, + struct tu_device_memory *mem) +{ + if (!p_atomic_dec_zero(&mem->refcnt)) + return; + + _tu_destroy_memory(device, mem); + + vk_object_free(&device->vk, NULL, mem); +} + VKAPI_ATTR VkResult VKAPI_CALL tu_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo, @@ -3314,6 +3429,7 @@ tu_AllocateMemory(VkDevice _device, } mem->size = pAllocateInfo->allocationSize; + mem->refcnt = 1; const VkImportMemoryFdInfoKHR *fd_info = vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); @@ -3383,22 +3499,9 @@ tu_AllocateMemory(VkDevice _device, VkMemoryPropertyFlags mem_property = device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex]; - if (mem_property & VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT) { - mem->lazy = true; - mtx_init(&mem->lazy_mutex, mtx_plain); - enum tu_sparse_vma_flags sparse_flags = - (alloc_flags & TU_BO_ALLOC_REPLAYABLE) ? - TU_SPARSE_VMA_REPLAYABLE : TU_SPARSE_VMA_NONE; - result = tu_sparse_vma_init(device, &mem->vk.base, - &mem->lazy_vma, &mem->iova, - sparse_flags, - pAllocateInfo->allocationSize, - client_address); - } else { - result = tu_bo_init_new_explicit_iova( - device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize, - client_address, mem_property, alloc_flags, NULL, name); - } + result = _tu_init_memory(device, mem, mem_property, alloc_flags, + pAllocateInfo->allocationSize, client_address, + name); } if (result == VK_SUCCESS && !mem->lazy) { @@ -3486,15 +3589,7 @@ tu_FreeMemory(VkDevice _device, TU_RMV(resource_destroy, device, mem); - if (mem->bo) { - p_atomic_add(&device->physical_device->heap.used, -mem->bo->size); - tu_bo_finish(device, mem->bo); - } - - if (mem->lazy) { - tu_sparse_vma_finish(device, &mem->lazy_vma); - mtx_destroy(&mem->lazy_mutex); - } + _tu_destroy_memory(device, mem); vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk); } @@ -3587,6 +3682,183 @@ tu_GetDeviceMemoryCommitment(VkDevice device, *pCommittedMemoryInBytes = memory->lazy_initialized ? memory->size : 0; } +static VkResult +tu_get_msrtss_temporary(struct tu_device *dev, + struct tu_device_memory **mem_out, + uint64_t size, bool depth) +{ + struct tu_device_memory **msrtss_temporary = + depth ? &dev->msrtss_depth_temporary : &dev->msrtss_color_temporary; + + mtx_lock(&dev->mutex); + if ((*msrtss_temporary) && + (*msrtss_temporary)->size >= size) { + struct tu_device_memory *mem = tu_memory_get_ref(*msrtss_temporary); + mtx_unlock(&dev->mutex); + *mem_out = mem; + return VK_SUCCESS; + } + + if (*msrtss_temporary) + tu_destroy_memory(dev, *msrtss_temporary); + + struct tu_device_memory *mem; + VkResult result = + tu_create_memory(dev, &mem, + depth ? (VkMemoryPropertyFlags)0 : + VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT, + TU_BO_ALLOC_INTERNAL_RESOURCE, + size, depth ? "MSRTSS depth" : "MSRTSS color"); + if (result != VK_SUCCESS) { + mtx_unlock(&dev->mutex); + return result; + } + + *msrtss_temporary = tu_memory_get_ref(mem); + mtx_unlock(&dev->mutex); + + *mem_out = mem; + return VK_SUCCESS; +} + +/* Allocate lazy memory and setup images for transient attachments that are + * implicitly created by MSRTSS. The lifetime of these are tied to the + * framebuffer with render passes or the command buffer with dynamic + * rendering. + */ +VkResult +tu_init_msrtss_attachments(struct tu_device *device, + const struct tu_render_pass *pass, + const struct tu_framebuffer *fb, + const struct VkFramebufferAttachmentImageInfo *attachment_info, + const struct tu_image_view **attachments, + struct tu_image *images, + struct tu_image_view *iviews, + struct tu_device_memory **depth_mem_out, + struct tu_device_memory **color_mem_out) +{ + uint64_t depth_size = 0, color_size = 0; + + /* First, create images and calculate size requirement. */ + for (unsigned i = 0; i < pass->attachment_count - pass->user_attachment_count; i++) { + const struct tu_render_pass_attachment *att = + &pass->attachments[pass->user_attachment_count + i]; + uint32_t user_att_idx = att->user_att; + VkImageCreateFlags flags = + attachments ? attachments[user_att_idx]->image->vk.create_flags : + attachment_info[user_att_idx].flags; + bool is_ds = vk_format_is_depth_or_stencil(att->format); + VkImageCreateInfo image_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .flags = flags & VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_EXT, + .imageType = VK_IMAGE_TYPE_2D, + .format = att->format, + .extent = { + fb->width, fb->height, 1 + }, + .mipLevels = 1, + .arrayLayers = MAX2(fb->layers, pass->num_views), + .samples = att->samples, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = is_ds ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : + (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT), + }; + + vk_image_init(&device->vk, &images[i].vk, &image_info); + tu_image_init(device, &images[i], &image_info); + TU_CALLX(device, tu_image_update_layout)(device, &images[i], DRM_FORMAT_MOD_INVALID, NULL); + + if (is_ds) { + depth_size = align64(depth_size, images[i].layout[0].base_align); + depth_size += images[i].total_size; + } else { + color_size = align64(color_size, images[i].layout[0].base_align); + color_size += images[i].total_size; + } + } + + /* Allocate memory. + * + * TODO: Once we support partially committing memory, we won't need to make + * separate allocations for depth and color. For now this at least avoids + * allocating memory for color attachments. + */ + VkResult result = VK_SUCCESS; + + struct tu_device_memory *depth_mem = NULL, *color_mem = NULL; + if (depth_size != 0) { + result = + tu_get_msrtss_temporary(device, &depth_mem, depth_size, true); + if (result != VK_SUCCESS) + return result; + } + + if (color_size != 0) { + result = + tu_get_msrtss_temporary(device, &color_mem, color_size, false); + if (result != VK_SUCCESS) { + if (depth_size != 0) + tu_destroy_memory(device, depth_mem); + return result; + } + } + + *depth_mem_out = depth_mem; + *color_mem_out = color_mem; + + /* Bind images to memory and create image views. */ + uint64_t depth_offset = 0, color_offset = 0; + for (unsigned i = 0; i < pass->attachment_count - pass->user_attachment_count; i++) { + const struct tu_render_pass_attachment *att = + &pass->attachments[pass->user_attachment_count + i]; + struct tu_image *image = &images[i]; + bool is_ds = vk_format_is_depth_or_stencil(att->format); + + if (is_ds) { + depth_offset = align64(depth_offset, image->layout[0].base_align); + image->mem = depth_mem; + image->mem_offset = depth_offset; + image->iova = depth_mem->iova + depth_offset; + depth_offset += image->total_size; + } else { + color_offset = align64(color_offset, image->layout[0].base_align); + image->mem = color_mem; + image->mem_offset = color_offset; + image->iova = color_mem->iova + color_offset; + color_offset += image->total_size; + } + + VkImageViewCreateInfo iview_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = tu_image_to_handle(&images[i]), + .viewType = + fb->layers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, + .format = att->format, + .components = { + VK_COMPONENT_SWIZZLE_R, + VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, + VK_COMPONENT_SWIZZLE_A, + }, + .subresourceRange = { + .aspectMask = vk_format_aspects(att->format), + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = fb->layers, + } + }; + + tu_image_view_init(device, &iviews[i], &iview_info); + } + + assert(color_offset == color_size); + assert(depth_offset == depth_size); + + return VK_SUCCESS; +} + VKAPI_ATTR VkResult VKAPI_CALL tu_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo, @@ -3606,15 +3878,27 @@ tu_CreateFramebuffer(VkDevice _device, bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT; - size_t size = sizeof(*framebuffer); - if (!imageless) - size += sizeof(struct tu_image_view *) * pCreateInfo->attachmentCount; - framebuffer = (struct tu_framebuffer *) vk_object_alloc( - &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER); - if (framebuffer == NULL) + uint32_t msrtss_attachment_count = pass->attachment_count - + pass->user_attachment_count; + struct tu_image_view **attachments; + struct tu_image *images; + struct tu_image_view *iviews; + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &framebuffer, struct tu_framebuffer, 1); + vk_multialloc_add(&ma, &attachments, struct tu_image_view *, + (imageless ? 0 : pCreateInfo->attachmentCount) + + msrtss_attachment_count); + if (msrtss_attachment_count) { + vk_multialloc_add(&ma, &images, struct tu_image, + msrtss_attachment_count); + vk_multialloc_add(&ma, &iviews, struct tu_image_view, + msrtss_attachment_count); + } + if (!vk_object_multizalloc( + &device->vk, &ma, pAllocator, VK_OBJECT_TYPE_FRAMEBUFFER)) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - framebuffer->attachment_count = pCreateInfo->attachmentCount; + framebuffer->attachment_count = pass->attachment_count; framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; @@ -3629,6 +3913,35 @@ tu_CreateFramebuffer(VkDevice _device, tu_framebuffer_tiling_config(framebuffer, device, pass); + /* For MSRTSS, allocate extra images that are tied to the VkFramebuffer */ + if (msrtss_attachment_count > 0) { + const VkFramebufferAttachmentsCreateInfo *fb_att_info = + vk_find_struct_const(pCreateInfo->pNext, + FRAMEBUFFER_ATTACHMENTS_CREATE_INFO); + VkResult result = + tu_init_msrtss_attachments(device, + pass, framebuffer, + imageless ? fb_att_info->pAttachmentImageInfos : NULL, + imageless ? NULL : + framebuffer->attachments, + images, iviews, + &framebuffer->depth_mem, + &framebuffer->color_mem); + if (result != VK_SUCCESS) { + vk_object_free(&device->vk, pAllocator, framebuffer); + return vk_error(device, result); + } + + /* With imageless attachments, the only attachments in the framebuffer + * are MSRTSS attachments. Without imageless attachments, they are after + * the user's attachments. + */ + for (uint32_t i = 0; i < msrtss_attachment_count; i++) { + uint32_t fb_idx = i + (imageless ? 0 : pCreateInfo->attachmentCount); + framebuffer->attachments[fb_idx] = &iviews[i]; + } + } + *pFramebuffer = tu_framebuffer_to_handle(framebuffer); return VK_SUCCESS; } @@ -3651,6 +3964,46 @@ tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer, tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass); } +VkResult +tu_setup_dynamic_msrtss(struct tu_cmd_buffer *cmd_buffer) +{ + struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; + struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer; + + if (pass->attachment_count > pass->user_attachment_count) { + struct tu_device_memory *depth_mem = NULL, *color_mem = NULL; + + VkResult result = + tu_init_msrtss_attachments(cmd_buffer->device, + pass, framebuffer, NULL, + cmd_buffer->dynamic_attachments, + cmd_buffer->dynamic_msrtss_images, + cmd_buffer->dynamic_msrtss_iviews, + &depth_mem, &color_mem); + + if (result != VK_SUCCESS) { + return vk_error(cmd_buffer, result); + } + + for (unsigned i = 0; i < pass->attachment_count - + pass->user_attachment_count; i++) { + cmd_buffer->dynamic_attachments[i + pass->user_attachment_count] = + &cmd_buffer->dynamic_msrtss_iviews[i]; + } + + if (color_mem) { + util_dynarray_append(&cmd_buffer->msrtss_color_temporaries, + struct tu_device_memory *, color_mem); + } + if (depth_mem) { + util_dynarray_append(&cmd_buffer->msrtss_depth_temporaries, + struct tu_device_memory *, depth_mem); + } + } + + return VK_SUCCESS; +} + VKAPI_ATTR void VKAPI_CALL tu_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb, @@ -3668,6 +4021,11 @@ tu_DestroyFramebuffer(VkDevice _device, if (!fb) return; + if (fb->depth_mem) + tu_destroy_memory(device, fb->depth_mem); + if (fb->color_mem) + tu_destroy_memory(device, fb->color_mem); + vk_object_free(&device->vk, pAllocator, fb); } diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 1553f3fc288..82c8c8decaa 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -458,6 +458,12 @@ struct tu_device /* Address space and global fault count for this local_fd with DRM backend */ uint64_t fault_count; + /* Temporary storage for multisampled attachments backed by a + * single-sampled image view in sysmem mode. + */ + struct tu_device_memory *msrtss_color_temporary; + struct tu_device_memory *msrtss_depth_temporary; + struct u_trace_context trace_context; struct list_head copy_timestamp_cs_pool; mtx_t copy_timestamp_cs_pool_mutex; @@ -483,6 +489,8 @@ struct tu_device_memory uint64_t iova; uint64_t size; + uint32_t refcnt; + /* For lazy memory */ bool lazy; bool lazy_initialized; @@ -497,6 +505,10 @@ struct tu_device_memory VK_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, vk.base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) +void +tu_destroy_memory(struct tu_device *device, + struct tu_device_memory *mem); + VkResult tu_allocate_lazy_memory(struct tu_device *dev, struct tu_device_memory *mem); @@ -538,6 +550,8 @@ struct tu_framebuffer uint32_t height; uint32_t layers; + struct tu_device_memory *depth_mem, *color_mem; + struct tu_tiling_config tiling[TU_GMEM_LAYOUT_COUNT]; uint32_t attachment_count; @@ -582,6 +596,9 @@ tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo); void tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo); +VkResult +tu_setup_dynamic_msrtss(struct tu_cmd_buffer *cmd_buffer); + void tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream, void *ts_from, uint64_t from_offset_B, @@ -669,5 +686,4 @@ tu_bo_init_new_cached(struct tu_device *dev, struct vk_object_base *base, flags, NULL, name); } - #endif /* TU_DEVICE_H */ diff --git a/src/freedreno/vulkan/tu_formats.cc b/src/freedreno/vulkan/tu_formats.cc index a1768017157..b9ba2c985b3 100644 --- a/src/freedreno/vulkan/tu_formats.cc +++ b/src/freedreno/vulkan/tu_formats.cc @@ -127,7 +127,8 @@ static void tu_physical_device_get_format_properties( struct tu_physical_device *physical_device, VkFormat vk_format, - VkFormatProperties3 *out_properties) + VkFormatProperties3 *out_properties, + VkSubpassResolvePerformanceQueryEXT *msrtss_out) { VkFormatFeatureFlags2 linear = 0, optimal = 0, buffer = 0; enum pipe_format format = vk_format_to_pipe_format(vk_format); @@ -145,6 +146,10 @@ tu_physical_device_get_format_properties( goto end; } + /* We never have to spill to memory for MSRTSS. */ + if (msrtss_out) + msrtss_out->optimal = true; + /* We don't support BufferToImage/ImageToBuffer for npot formats */ if (!is_npot) buffer |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; @@ -312,9 +317,12 @@ tu_GetPhysicalDeviceFormatProperties2( vk_find_struct(pFormatProperties->pNext, FORMAT_PROPERTIES_3); if (!props3) props3 = &local_props3; + VkSubpassResolvePerformanceQueryEXT *msrtss_out = + vk_find_struct(pFormatProperties->pNext, + SUBPASS_RESOLVE_PERFORMANCE_QUERY_EXT); tu_physical_device_get_format_properties( - physical_device, format, props3); + physical_device, format, props3, msrtss_out); pFormatProperties->formatProperties = (VkFormatProperties) { .linearTilingFeatures = @@ -386,7 +394,7 @@ tu_get_image_format_properties( BITMASK_ENUM(VkSampleCountFlagBits) sampleCounts = VK_SAMPLE_COUNT_1_BIT; tu_physical_device_get_format_properties(physical_device, info->format, - &format_props); + &format_props, NULL); switch (info->tiling) { case VK_IMAGE_TILING_LINEAR: diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc index df9c436ea71..db1c0ea2462 100644 --- a/src/freedreno/vulkan/tu_image.cc +++ b/src/freedreno/vulkan/tu_image.cc @@ -185,7 +185,7 @@ tu_cs_image_flag_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t l tu_cs_emit(cs, iview->FLAG_BUFFER_PITCH); } -static void +void tu_image_view_init(struct tu_device *device, struct tu_image_view *iview, const VkImageViewCreateInfo *pCreateInfo) @@ -646,7 +646,7 @@ format_list_ubwc_possible(struct tu_device *dev, return true; } -static VkResult +VkResult tu_image_init(struct tu_device *device, struct tu_image *image, const VkImageCreateInfo *pCreateInfo) { diff --git a/src/freedreno/vulkan/tu_image.h b/src/freedreno/vulkan/tu_image.h index e077ce757d7..3fe9a1c00ec 100644 --- a/src/freedreno/vulkan/tu_image.h +++ b/src/freedreno/vulkan/tu_image.h @@ -57,6 +57,15 @@ struct tu_image }; VK_DEFINE_NONDISP_HANDLE_CASTS(tu_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) +VkResult +tu_image_init(struct tu_device *device, struct tu_image *image, + const VkImageCreateInfo *pCreateInfo); + +template +VkResult +tu_image_update_layout(struct tu_device *device, struct tu_image *image, + uint64_t modifier, const VkSubresourceLayout *plane_layouts); + struct tu_image_view { struct vk_image_view vk; @@ -80,6 +89,11 @@ struct tu_image_view VK_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW); +void +tu_image_view_init(struct tu_device *device, + struct tu_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo); + uint32_t tu6_plane_count(VkFormat format); enum pipe_format tu6_plane_format(VkFormat format, uint32_t plane); diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 6358e74612f..c175f64fd6a 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -371,6 +371,20 @@ tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd) tu6_write_lrz_cntl(cmd, &cmd->cs, {}); tu6_emit_lrz_buffer(&cmd->cs, NULL); } + + /* Multisample and single-sample LRZ layout are different, so when + * unresolving a depth image we have to disable LRZ for the entirety of the + * render pass. + */ + for (unsigned i = 0; i < cmd->state.pass->subpass_count; i++) { + const struct tu_subpass *subpass = &cmd->state.pass->subpasses[i]; + if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED && + subpass->unresolve_count > subpass->color_count && + subpass->unresolve_attachments[subpass->color_count].attachment != + VK_ATTACHMENT_UNUSED) { + tu_lrz_disable_during_renderpass(cmd, "multisampled_render_to_single_sample with LOAD_OP_LOAD"); + } + } } TU_GENX(tu_lrz_begin_renderpass); diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index 5a350bac73e..730393ee4db 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -528,7 +528,7 @@ tu_render_pass_disable_fdm(struct tu_device *dev, struct tu_render_pass *pass) if (TU_DEBUG(NOFDM)) return true; - for (uint32_t i = 0; i < pass->attachment_count; i++) { + for (uint32_t i = 0; i < pass->user_attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; if (att->samples > 1 && @@ -593,6 +593,83 @@ tu_render_pass_cond_config(struct tu_device *device, } } +/** + * Optimizes MSRTSS resolves/unresolves. + * + * We always emit a resolve/unresolve around each attachment used as MSRTSS + * for each subpass. But if the attachment isn't loaded or isn't stored and + * it's the first or last use respectively, then we can optimize it away. We + * can also transfer clears from the original attachment to the multisample + * attachment. We have to do this optimization after use tracking information + * is populated. + */ +static void +tu_render_pass_opt_resolve_unresolve(struct tu_render_pass *pass) +{ + for (int i = 0; i < pass->subpass_count; i++) { + struct tu_subpass *subpass = &pass->subpasses[i]; + for (int j = 0; j < subpass->unresolve_count; j++) { + if (subpass->unresolve_attachments[j].attachment != + VK_ATTACHMENT_UNUSED) { + struct tu_render_pass_attachment *src_att = + &pass->attachments[subpass->unresolve_attachments[j].attachment]; + uint32_t dst_att_idx = + j >= subpass->color_count ? + subpass->depth_stencil_attachment.attachment : + subpass->color_attachments[j].attachment; + struct tu_render_pass_attachment *dst_att = + &pass->attachments[dst_att_idx]; + if (i == src_att->first_subpass_idx && + !src_att->load && !src_att->load_stencil) { + /* Transfer the clear to the (in-tile-memory) multisample + * attachment. + */ + if (src_att->clear_mask) { + dst_att->clear_mask = src_att->clear_mask; + dst_att->clear_views = src_att->clear_views; + src_att->remapped_clear_att = dst_att_idx; + src_att->clear_mask = 0; + src_att->clear_views = 0; + } + + /* Delete the unresolve. */ + subpass->unresolve_attachments[j].attachment = + VK_ATTACHMENT_UNUSED; + } + } + } + + for (int j = 0; j < subpass->resolve_count; j++) { + if (subpass->resolve_attachments[j].attachment != + VK_ATTACHMENT_UNUSED) { + struct tu_render_pass_attachment *att = + &pass->attachments[subpass->resolve_attachments[j].attachment]; + if ( + /* Check that the resolve source is a fake MSRTSS attachment. + */ + (j == subpass->color_count ? + subpass->depth_stencil_attachment.attachment : + subpass->color_attachments[i].attachment) >= + pass->user_attachment_count && + /* Check that it's the last use and the original attachment is + * not stored. + */ + i == att->last_subpass_idx && + !att->store && !att->store_stencil) { + /* Delete the resolve. */ + subpass->resolve_attachments[j].attachment = + VK_ATTACHMENT_UNUSED; + att->will_be_resolved = false; + if (j == subpass->color_count) { + subpass->resolve_depth_stencil = false; + subpass->resolve_count = subpass->color_count; + } + } + } + } + } +} + /** * Checks if the pass should allow IB2 skipping. * @@ -825,6 +902,7 @@ attachment_set_ops(struct tu_device *device, } /* load/store ops */ + att->remapped_clear_att = VK_ATTACHMENT_UNUSED; att->clear_mask = (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0; att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD); @@ -892,7 +970,7 @@ tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const struct tu_render_pass_attachment *att = &pass->attachments[a]; att->gmem = true; - update_samples(subpass, pCreateInfo->pAttachments[a].samples); + update_samples(subpass, att->samples); att->clear_views |= subpass->multiview_mask; /* Loads and clears are emitted at the start of the subpass that needs them. */ @@ -917,6 +995,45 @@ tu_subpass_resolve_attachment(struct tu_render_pass *pass, int i, uint32_t dst_a } } +static void +tu_init_renderpass_attachment(struct tu_device *device, + const VkAttachmentDescription2 *pAttachment, + struct tu_render_pass_attachment *att, + VkSampleCountFlagBits samples) +{ + att->format = pAttachment->format; + att->samples = samples; + /* for d32s8, cpp is for the depth image, and + * att->samples will be used as the cpp for the stencil image + */ + if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) + att->cpp = 4 * samples; + else + att->cpp = vk_format_get_blocksize(att->format) * samples; + /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */ + att->gmem = false; + + att->first_subpass_idx = VK_SUBPASS_EXTERNAL; + att->last_subpass_idx = 0; +} + +static void +tu_init_msrtss_renderpass_attachment(struct tu_device *device, + const VkRenderPassCreateInfo2 *pInfo, + uint32_t user_att_idx, + struct tu_render_pass_attachment *att, + VkSampleCountFlagBits samples) +{ + tu_init_renderpass_attachment(device, &pInfo->pAttachments[user_att_idx], att, samples); + att->user_att = user_att_idx; + + attachment_set_ops(device, att, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE); +} + VKAPI_ATTR VkResult VKAPI_CALL tu_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo, @@ -935,17 +1052,46 @@ tu_CreateRenderPass2(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); + uint32_t attachment_count = pCreateInfo->attachmentCount; + + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2 *subpass = &pCreateInfo->pSubpasses[i]; + const VkMultisampledRenderToSingleSampledInfoEXT *msrtss = + vk_find_struct_const(subpass->pNext, + MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable) { + for (uint32_t i = 0; i < subpass->colorAttachmentCount; i++) { + if (subpass->pColorAttachments[i].attachment == VK_ATTACHMENT_UNUSED) + continue; + const VkAttachmentDescription2 *att = + &pCreateInfo->pAttachments[subpass->pColorAttachments[i].attachment]; + if (att->samples != msrtss->rasterizationSamples) { + attachment_count++; + } + } + if (subpass->pDepthStencilAttachment && + subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { + const VkAttachmentDescription2 *att = + &pCreateInfo->pAttachments[subpass->pDepthStencilAttachment->attachment]; + if (att->samples != msrtss->rasterizationSamples) { + attachment_count++; + } + } + } + } + size = sizeof(*pass); size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + size += attachment_count * sizeof(pass->attachments[0]); pass = (struct tu_render_pass *) vk_object_zalloc( &device->vk, pAllocator, size, VK_OBJECT_TYPE_RENDER_PASS); if (pass == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - pass->attachment_count = pCreateInfo->attachmentCount; + pass->attachment_count = attachment_count; + pass->user_attachment_count = pCreateInfo->attachmentCount; pass->subpass_count = pCreateInfo->subpassCount; pass->attachments = (struct tu_render_pass_attachment *) ((char *) pass + @@ -954,39 +1100,40 @@ tu_CreateRenderPass2(VkDevice _device, for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; - att->format = pCreateInfo->pAttachments[i].format; - att->samples = pCreateInfo->pAttachments[i].samples; - /* for d32s8, cpp is for the depth image, and - * att->samples will be used as the cpp for the stencil image - */ - if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) - att->cpp = 4 * att->samples; - else - att->cpp = vk_format_get_blocksize(att->format) * att->samples; - /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */ - att->gmem = false; - - VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp; - VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp; - - attachment_set_ops(device, att, loadOp, stencilLoadOp, + tu_init_renderpass_attachment(device, &pCreateInfo->pAttachments[i], + att, + pCreateInfo->pAttachments[i].samples); + attachment_set_ops(device, att, + pCreateInfo->pAttachments[i].loadOp, + pCreateInfo->pAttachments[i].stencilLoadOp, pCreateInfo->pAttachments[i].storeOp, pCreateInfo->pAttachments[i].stencilStoreOp); - - att->first_subpass_idx = VK_SUBPASS_EXTERNAL; - att->last_subpass_idx = 0; } + uint32_t subpass_attachment_count = 0; struct tu_subpass_attachment *p; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; const VkSubpassDescriptionDepthStencilResolve *ds_resolve = vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); + const VkMultisampledRenderToSingleSampledInfoEXT *msrtss = + vk_find_struct_const(desc->pNext, + MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); + bool msrtss_enabled = msrtss && + msrtss->multisampledRenderToSingleSampledEnable; + pass->subpasses[i].resolve_count = + ((desc->pResolveAttachments || msrtss_enabled) ? desc->colorAttachmentCount : 0) + + ((is_depth_stencil_resolve_enabled(ds_resolve) || msrtss_enabled) ? 1 : 0); + pass->subpasses[i].unresolve_count = + msrtss_enabled ? + (desc->colorAttachmentCount + + (desc->pDepthStencilAttachment && + desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED ? 1 : 0)) + : 0; subpass_attachment_count += desc->inputAttachmentCount + desc->colorAttachmentCount + - (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0); + pass->subpasses[i].resolve_count + pass->subpasses[i].unresolve_count; } if (subpass_attachment_count) { @@ -1018,15 +1165,18 @@ tu_CreateRenderPass2(VkDevice _device, pass->has_fdm = true; p = pass->subpass_attachments; + uint32_t msrtss_att_idx = pCreateInfo->attachmentCount; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; const VkSubpassDescriptionDepthStencilResolve *ds_resolve = vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE); + const VkMultisampledRenderToSingleSampledInfoEXT *msrtss = + vk_find_struct_const(desc->pNext, + MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); struct tu_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; - subpass->resolve_count = 0; subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve); subpass->samples = (VkSampleCountFlagBits) 0; subpass->srgb_cntl = 0; @@ -1064,12 +1214,41 @@ tu_CreateRenderPass2(VkDevice _device, if (desc->colorAttachmentCount > 0) { subpass->color_attachments = p; p += desc->colorAttachmentCount; + } + if (subpass->resolve_count) { + subpass->resolve_attachments = p; + p += subpass->resolve_count; + for (uint32_t i = 0; i < subpass->resolve_count; i++) + subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + } + + if (subpass->unresolve_count) { + subpass->unresolve_attachments = p; + p += subpass->unresolve_count; + for (uint32_t i = 0; i < subpass->unresolve_count; i++) + subpass->unresolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + } + + if (desc->colorAttachmentCount) { for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { uint32_t a = desc->pColorAttachments[j].attachment; subpass->color_attachments[j].attachment = a; if (a != VK_ATTACHMENT_UNUSED) { + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable && + msrtss->rasterizationSamples != + pCreateInfo->pAttachments[a].samples) { + tu_init_msrtss_renderpass_attachment( + device, pCreateInfo, a, + &pass->attachments[msrtss_att_idx], + msrtss->rasterizationSamples); + tu_subpass_resolve_attachment(pass, i, a, msrtss_att_idx); + subpass->resolve_attachments[j].attachment = a; + subpass->unresolve_attachments[j].attachment = a; + subpass->color_attachments[j].attachment = a = msrtss_att_idx++; + } + tu_subpass_use_attachment(pass, i, a, pCreateInfo); if (vk_format_is_srgb(pass->attachments[a].format)) @@ -1078,11 +1257,12 @@ tu_CreateRenderPass2(VkDevice _device, } } - subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL; if (desc->pResolveAttachments) { - p += desc->colorAttachmentCount; - subpass->resolve_count += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + /* skip if MSRTSS has already created a resolve attachment */ + if (subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) + continue; + uint32_t a = desc->pResolveAttachments[j].attachment; uint32_t src_a = desc->pColorAttachments[j].attachment; subpass->resolve_attachments[j].attachment = a; @@ -1092,8 +1272,6 @@ tu_CreateRenderPass2(VkDevice _device, } if (subpass->resolve_depth_stencil) { - p++; - subpass->resolve_count++; uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment; uint32_t src_a = desc->pDepthStencilAttachment->attachment; subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a; @@ -1107,6 +1285,20 @@ tu_CreateRenderPass2(VkDevice _device, subpass->depth_used = a != VK_ATTACHMENT_UNUSED; subpass->stencil_used = a != VK_ATTACHMENT_UNUSED; if (a != VK_ATTACHMENT_UNUSED) { + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable && + msrtss->rasterizationSamples != + pCreateInfo->pAttachments[a].samples) { + tu_init_msrtss_renderpass_attachment( + device, pCreateInfo, a, + &pass->attachments[msrtss_att_idx], + msrtss->rasterizationSamples); + tu_subpass_resolve_attachment(pass, i, a, msrtss_att_idx); + subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a; + subpass->unresolve_attachments[subpass->resolve_count - 1].attachment = a; + subpass->depth_stencil_attachment.attachment = a = msrtss_att_idx++; + subpass->resolve_depth_stencil = true; + } + tu_subpass_use_attachment(pass, i, a, pCreateInfo); } @@ -1129,6 +1321,8 @@ tu_CreateRenderPass2(VkDevice _device, tu_render_pass_check_feedback_loop(pass); + tu_render_pass_opt_resolve_unresolve(pass); + /* disable unused attachments */ for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; @@ -1180,11 +1374,12 @@ tu_DestroyRenderPass(VkDevice _device, static void tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att, - struct tu_image_view *view) + struct tu_image_view *view, + VkSampleCountFlagBits samples) { *att = {}; att->format = view->vk.format; - att->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; + att->samples = samples; /* for d32s8, cpp is for the depth image, and * att->samples will be used as the cpp for the stencil image @@ -1202,6 +1397,9 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, struct tu_device *device = cmd_buffer->device; struct tu_render_pass *pass = &cmd_buffer->dynamic_pass; struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass; + const VkMultisampledRenderToSingleSampledInfoEXT *msrtss = + vk_find_struct_const(info->pNext, + MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT); *pass = {}; *subpass = {}; @@ -1210,10 +1408,13 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, pass->attachments = cmd_buffer->dynamic_rp_attachments; subpass->color_count = subpass->resolve_count = info->colorAttachmentCount; + if (msrtss) + subpass->unresolve_count = info->colorAttachmentCount; subpass->input_count = info->colorAttachmentCount + 1; subpass->color_attachments = cmd_buffer->dynamic_color_attachments; subpass->input_attachments = cmd_buffer->dynamic_input_attachments; subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments; + subpass->unresolve_attachments = cmd_buffer->dynamic_unresolve_attachments; subpass->multiview_mask = info->viewMask; subpass->legacy_dithering_enabled = info->flags & VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT; @@ -1239,40 +1440,69 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED; subpass->input_attachments[i + 1].attachment = VK_ATTACHMENT_UNUSED; subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + subpass->unresolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; continue; } VK_FROM_HANDLE(tu_image_view, view, att_info->imageView); - tu_setup_dynamic_attachment(att, view); - att->gmem = true; - att->clear_views = info->viewMask; - attachment_set_ops(device, att, att_info->loadOp, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp, - VK_ATTACHMENT_STORE_OP_DONT_CARE); - subpass->color_attachments[i].attachment = a++; - subpass->input_attachments[i + 1].attachment = - subpass->color_attachments[i].attachment; - subpass->input_attachments[i + 1].patch_input_gmem = true; + tu_setup_dynamic_attachment(att, view, + (VkSampleCountFlagBits)view->image->layout->nr_samples); + uint32_t att_idx = a++; + bool att_is_msrtss = false; + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable && + msrtss->rasterizationSamples != + (VkSampleCountFlagBits)view->image->layout->nr_samples) { + if (att_info->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + subpass->unresolve_attachments[i].attachment = att_idx; + else + subpass->unresolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + if (att_info->storeOp == VK_ATTACHMENT_STORE_OP_STORE) { + subpass->resolve_attachments[i].attachment = att_idx; + att->will_be_resolved = true; + } else { + subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + att->will_be_resolved = false; + } + attachment_set_ops(device, att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + att_info->storeOp, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + att_is_msrtss = true; + subpass->samples = msrtss->rasterizationSamples; + } else { + att->gmem = true; + att->clear_views = info->viewMask; + attachment_set_ops(device, att, att_info->loadOp, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + subpass->input_attachments[i + 1].patch_input_gmem = true; + subpass->unresolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; + } + subpass->color_attachments[i].attachment = att_idx; + subpass->input_attachments[i + 1].attachment = att_idx; - subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; if (vk_format_is_srgb(view->vk.format)) subpass->srgb_cntl |= 1 << i; - if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) { - struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; - VK_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView); - tu_setup_dynamic_attachment(resolve_att, resolve_view); - resolve_att->gmem = false; - attachment_set_ops( - device, resolve_att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_STORE_OP_DONT_CARE); - subpass->resolve_attachments[i].attachment = a++; - att->will_be_resolved = true; - } else { - subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; - att->will_be_resolved = false; + if (!att_is_msrtss) { + if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) { + struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; + VK_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView); + tu_setup_dynamic_attachment(resolve_att, resolve_view, + VK_SAMPLE_COUNT_1_BIT); + resolve_att->gmem = false; + attachment_set_ops( + device, resolve_att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + subpass->resolve_attachments[i].attachment = a++; + att->will_be_resolved = true; + } else { + subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED; + att->will_be_resolved = false; + } } } @@ -1287,47 +1517,88 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, VK_FROM_HANDLE(tu_image_view, view, common_info->imageView); struct tu_render_pass_attachment *att = &pass->attachments[a]; - tu_setup_dynamic_attachment(att, view); - att->gmem = true; - att->clear_views = info->viewMask; - subpass->depth_stencil_attachment.attachment = a++; - subpass->input_attachments[0].attachment = - subpass->depth_stencil_attachment.attachment; - subpass->input_attachments[0].patch_input_gmem = true; + tu_setup_dynamic_attachment(att, view, + (VkSampleCountFlagBits)view->image->layout->nr_samples); + uint32_t att_idx = a++; + subpass->input_attachments[0].attachment = att_idx; + subpass->depth_stencil_attachment.attachment = att_idx; subpass->depth_used = (bool) info->pDepthAttachment; subpass->stencil_used = (bool) info->pStencilAttachment; - attachment_set_ops( - device, att, + bool att_is_msrtss = false; + VkAttachmentLoadOp load_op = (info->pDepthAttachment && info->pDepthAttachment->imageView) ? - info->pDepthAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT, + info->pDepthAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT; + VkAttachmentLoadOp stencil_load_op = (info->pStencilAttachment && info->pStencilAttachment->imageView) ? - info->pStencilAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT, + info->pStencilAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT; + VkAttachmentStoreOp store_op = (info->pDepthAttachment && info->pDepthAttachment->imageView) ? - info->pDepthAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT, + info->pDepthAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT; + VkAttachmentStoreOp stencil_store_op = (info->pStencilAttachment && info->pStencilAttachment->imageView) ? - info->pStencilAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT); + info->pStencilAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT; + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable && + msrtss->rasterizationSamples != + (VkSampleCountFlagBits)view->image->layout->nr_samples) { + bool load = load_op == VK_ATTACHMENT_LOAD_OP_LOAD || + stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD; + bool store = store_op == VK_ATTACHMENT_STORE_OP_STORE || + stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE; + if (att->format == VK_FORMAT_D24_UNORM_S8_UINT && + (store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT || + stencil_store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT) && + store) + load = true; - subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; - - if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) { - unsigned i = subpass->resolve_count++; - struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; - VK_FROM_HANDLE(tu_image_view, resolve_view, - common_info->resolveImageView); - tu_setup_dynamic_attachment(resolve_att, resolve_view); - resolve_att->gmem = false; - attachment_set_ops(device, resolve_att, + if (load) { + unsigned i = subpass->unresolve_count++; + subpass->unresolve_attachments[i].attachment = att_idx; + } + if (store) { + unsigned i = subpass->resolve_count++; + subpass->resolve_attachments[i].attachment = att_idx; + att->will_be_resolved = true; + subpass->resolve_depth_stencil = true; + } else { + att->will_be_resolved = false; + } + attachment_set_ops(device, att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_LOAD_OP_DONT_CARE, - VK_ATTACHMENT_STORE_OP_STORE, - VK_ATTACHMENT_STORE_OP_STORE); - subpass->resolve_attachments[i].attachment = a++; - att->will_be_resolved = true; - subpass->resolve_depth_stencil = true; + store_op, stencil_store_op); + att_is_msrtss = true; + subpass->samples = msrtss->rasterizationSamples; } else { - att->will_be_resolved = false; + att->gmem = true; + att->clear_views = info->viewMask; + attachment_set_ops( + device, att, load_op, stencil_load_op, store_op, + stencil_store_op); + subpass->input_attachments[0].patch_input_gmem = true; + subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples; + } + + if (!att_is_msrtss) { + if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) { + unsigned i = subpass->resolve_count++; + struct tu_render_pass_attachment *resolve_att = &pass->attachments[a]; + VK_FROM_HANDLE(tu_image_view, resolve_view, + common_info->resolveImageView); + tu_setup_dynamic_attachment(resolve_att, resolve_view, + VK_SAMPLE_COUNT_1_BIT); + resolve_att->gmem = false; + attachment_set_ops(device, resolve_att, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_STORE_OP_STORE); + subpass->resolve_attachments[i].attachment = a++; + att->will_be_resolved = true; + subpass->resolve_depth_stencil = true; + } else { + att->will_be_resolved = false; + } } } else { subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; @@ -1338,7 +1609,10 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, subpass->input_attachments[0].attachment = VK_ATTACHMENT_UNUSED; } - pass->attachment_count = a; + /* We have to set this early for tu_render_pass_disable_fdm() to work. We + * then set it again after the FDM attachment is added. + */ + pass->user_attachment_count = a; const VkRenderingFragmentDensityMapAttachmentInfoEXT *fdm_info = vk_find_struct_const(info->pNext, @@ -1348,7 +1622,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, VK_FROM_HANDLE(tu_image_view, view, fdm_info->imageView); struct tu_render_pass_attachment *att = &pass->attachments[a]; - tu_setup_dynamic_attachment(att, view); + tu_setup_dynamic_attachment(att, view, VK_SAMPLE_COUNT_1_BIT); pass->fragment_density_map.attachment = a++; attachment_set_ops(device, att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, @@ -1370,7 +1644,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, VK_FROM_HANDLE(tu_image_view, view, fsr_info->imageView); struct tu_render_pass_attachment *att = &pass->attachments[a]; - tu_setup_dynamic_attachment(att, view); + tu_setup_dynamic_attachment(att, view, VK_SAMPLE_COUNT_1_BIT); subpass->fsr_attachment = a++; attachment_set_ops(device, att, VK_ATTACHMENT_LOAD_OP_DONT_CARE, @@ -1385,6 +1659,75 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(device, pass)) pass->has_fdm = true; + pass->user_attachment_count = a; + + /* Setup MSRTSS attachments, which come after user attachments. They + * replace the color and depth/stencil attachments. + */ + if (msrtss && msrtss->multisampledRenderToSingleSampledEnable) { + for (uint32_t i = 0; i < info->colorAttachmentCount; i++) { + const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i]; + + if (att_info->imageView == VK_NULL_HANDLE) + continue; + + VK_FROM_HANDLE(tu_image_view, view, att_info->imageView); + + if (msrtss->rasterizationSamples != + (VkSampleCountFlagBits)view->image->layout->nr_samples) { + struct tu_render_pass_attachment *att = &pass->attachments[a]; + tu_setup_dynamic_attachment(att, view, msrtss->rasterizationSamples); + uint32_t att_idx = a++; + + att->gmem = true; + att->clear_views = info->viewMask; + att->user_att = subpass->color_attachments[i].attachment; + VkAttachmentLoadOp load_op = + att_info->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR ? VK_ATTACHMENT_LOAD_OP_CLEAR : + VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_set_ops(device, att, load_op, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + subpass->color_attachments[i].attachment = att_idx; + } + } + + if (info->pDepthAttachment || info->pStencilAttachment) { + const struct VkRenderingAttachmentInfo *common_info = + (info->pDepthAttachment && + info->pDepthAttachment->imageView != VK_NULL_HANDLE) ? + info->pDepthAttachment : + info->pStencilAttachment; + + if (common_info && common_info->imageView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(tu_image_view, view, common_info->imageView); + if (msrtss->rasterizationSamples != + (VkSampleCountFlagBits)view->image->layout->nr_samples) { + struct tu_render_pass_attachment *att = &pass->attachments[a]; + tu_setup_dynamic_attachment(att, view, msrtss->rasterizationSamples); + uint32_t att_idx = a++; + + VkAttachmentLoadOp load_op = + (info->pDepthAttachment && info->pDepthAttachment->imageView && + info->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) ? + VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_DONT_CARE; + VkAttachmentLoadOp stencil_load_op = + (info->pStencilAttachment && info->pStencilAttachment->imageView && + info->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) ? + VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachment_set_ops(device, att, load_op, stencil_load_op, + VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_DONT_CARE); + att->gmem = true; + att->clear_views = info->viewMask; + att->user_att = subpass->depth_stencil_attachment.attachment; + subpass->depth_stencil_attachment.attachment = att_idx; + } + } + } + } + pass->attachment_count = a; tu_render_pass_check_ib2_skip(pass); diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index 956c045add6..09518ecb894 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -97,6 +97,14 @@ struct tu_render_pass_attachment uint32_t cpp; VkImageAspectFlags clear_mask; uint32_t clear_views; + /* The internal MSRTSS attachment to clear when the user says to clear + * this attachment. Clear values must be remapped to this attachment. + */ + uint32_t remapped_clear_att; + /* For internal attachments created for MSRTSS, the original user attachment + * which it is resolved/unresolved to. + */ + uint32_t user_att; bool load; bool store; bool gmem; @@ -120,7 +128,7 @@ struct tu_render_pass { struct vk_object_base base; - uint32_t attachment_count; + uint32_t attachment_count, user_attachment_count; uint32_t subpass_count; uint32_t gmem_pixels[TU_GMEM_LAYOUT_COUNT]; uint32_t tile_align_w;