diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 9b89194fb01..950a874ab9b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4713,6 +4713,7 @@ tu_flush_for_access(struct tu_cache_state *cache, SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR) SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH) + SRC_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE) #undef SRC_INCOHERENT_FLUSH @@ -4747,6 +4748,7 @@ tu_flush_for_access(struct tu_cache_state *cache, DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR) DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH) + DST_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE) if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) { flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE; @@ -4841,7 +4843,8 @@ gfx_write_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, static enum tu_cmd_access_mask vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, - VkPipelineStageFlags2 stages, bool image_only, bool gmem) + VkPipelineStageFlags2 stages, bool image_only, bool gmem, + bool sparse_aliasing) { BITMASK_ENUM(tu_cmd_access_mask) mask = 0; @@ -4891,8 +4894,13 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | - SHADER_STAGES)) - mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ; + SHADER_STAGES)) { + if (sparse_aliasing) + mask |= TU_ACCESS_UCHE_INCOHERENT_READ; + else + mask |= TU_ACCESS_UCHE_READ; + mask |= TU_ACCESS_CCHE_READ; + } if (gfx_read_access(flags, stages, VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR, @@ -4912,13 +4920,20 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, if (gfx_read_access(flags, stages, VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT, - SHADER_STAGES)) + SHADER_STAGES)) { mask |= TU_ACCESS_UCHE_READ_GMEM; + if (sparse_aliasing) + mask |= TU_ACCESS_UCHE_INCOHERENT_READ; + } if (gfx_read_access(flags, stages, VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT, SHADER_STAGES)) { - mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ | + if (sparse_aliasing) + mask |= TU_ACCESS_UCHE_INCOHERENT_READ; + else + mask |= TU_ACCESS_UCHE_READ; + mask |= TU_ACCESS_BINDLESS_DESCRIPTOR_READ | TU_ACCESS_CCHE_READ; } @@ -4927,8 +4942,12 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT | VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | - SHADER_STAGES)) - mask |= TU_ACCESS_UCHE_WRITE; + SHADER_STAGES)) { + if (sparse_aliasing) + mask |= TU_ACCESS_UCHE_INCOHERENT_WRITE; + else + mask |= TU_ACCESS_UCHE_WRITE; + } if (gfx_write_access(flags, stages, VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, @@ -4992,7 +5011,7 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) { if (gmem) { mask |= TU_ACCESS_SYSMEM_WRITE; - } else if (image_only) { + } else if (image_only && !sparse_aliasing) { /* Because we always split up blits/copies of images involving * multiple layers, we always access each layer in the same way, with * the same base address, same format, etc. This means we can avoid @@ -5011,7 +5030,11 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2, VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) { - mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ; + if (sparse_aliasing) + mask |= TU_ACCESS_UCHE_INCOHERENT_READ; + else + mask |= TU_ACCESS_UCHE_READ; + mask |= TU_ACCESS_CCHE_READ; } return mask; @@ -5466,10 +5489,12 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer, sanitize_dst_stage(barrier->dst_stage_mask); BITMASK_ENUM(tu_cmd_access_mask) src_flags = vk2tu_access(barrier->src_access_mask, barrier->src_access_mask2, - src_stage_vk, false, false); + src_stage_vk, false, false, + cmd_buffer->device->vk.enabled_features.sparseResidencyAliased); BITMASK_ENUM(tu_cmd_access_mask) dst_flags = vk2tu_access(barrier->dst_access_mask, barrier->dst_access_mask2, - dst_stage_vk, false, false); + dst_stage_vk, false, false, + cmd_buffer->device->vk.enabled_features.sparseResidencyAliased); if (barrier->incoherent_ccu_color) src_flags |= TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE; @@ -8296,9 +8321,11 @@ tu_barrier(struct tu_cmd_buffer *cmd, } src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, - sanitized_src_stage, false, gmem); + sanitized_src_stage, false, gmem, + cmd->device->vk.enabled_features.sparseResidencyAliased); dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, - sanitized_dst_stage, false, gmem); + sanitized_dst_stage, false, gmem, + cmd->device->vk.enabled_features.sparseResidencyAliased); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; } @@ -8306,6 +8333,9 @@ tu_barrier(struct tu_cmd_buffer *cmd, for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { const VkBufferMemoryBarrier2 *barrier = &dep_info->pBufferMemoryBarriers[i]; + VK_FROM_HANDLE(tu_buffer, buffer, barrier->buffer); + bool sparse_aliasing = + buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT; VkPipelineStageFlags2 sanitized_src_stage = sanitize_src_stage(barrier->srcStageMask); VkPipelineStageFlags2 sanitized_dst_stage = @@ -8320,9 +8350,11 @@ tu_barrier(struct tu_cmd_buffer *cmd, } src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, - sanitized_src_stage, false, gmem); + sanitized_src_stage, false, gmem, + sparse_aliasing); dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, - sanitized_dst_stage, false, gmem); + sanitized_dst_stage, false, gmem, + sparse_aliasing); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; } @@ -8330,8 +8362,12 @@ tu_barrier(struct tu_cmd_buffer *cmd, for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { const VkImageMemoryBarrier2 *barrier = &dep_info->pImageMemoryBarriers[i]; + VK_FROM_HANDLE(tu_image, image, barrier->image); VkImageLayout old_layout = barrier->oldLayout; + + bool sparse_aliasing = + image->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT; if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) { /* The underlying memory for this image may have been used earlier * within the same queue submission for a different image, which @@ -8340,8 +8376,6 @@ tu_barrier(struct tu_cmd_buffer *cmd, * to the image. We don't want these entries being flushed later and * overwriting the actual image, so we need to flush the CCU. */ - VK_FROM_HANDLE(tu_image, image, barrier->image); - if (vk_format_is_depth_or_stencil(image->vk.format)) { src_flags |= TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE; } else { @@ -8362,9 +8396,11 @@ tu_barrier(struct tu_cmd_buffer *cmd, } src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2, - sanitized_src_stage, true, gmem); + sanitized_src_stage, true, gmem, + sparse_aliasing); dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2, - sanitized_dst_stage, true, gmem); + sanitized_dst_stage, true, gmem, + sparse_aliasing); srcStage |= sanitized_src_stage; dstStage |= sanitized_dst_stage; } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index d180c6094f1..68195da4a9a 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -151,6 +151,10 @@ enum tu_cmd_access_mask { TU_ACCESS_RTU_READ = 1 << 17, + /* An access through UCHE that must always be flushed/invalidated */ + TU_ACCESS_UCHE_INCOHERENT_READ = 1 << 18, + TU_ACCESS_UCHE_INCOHERENT_WRITE = 1 << 19, + TU_ACCESS_READ = TU_ACCESS_UCHE_READ | TU_ACCESS_CCU_COLOR_READ | @@ -159,7 +163,8 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_INCOHERENT_READ | TU_ACCESS_SYSMEM_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ | - TU_ACCESS_CCHE_READ, + TU_ACCESS_CCHE_READ | + TU_ACCESS_UCHE_INCOHERENT_READ, TU_ACCESS_WRITE = TU_ACCESS_UCHE_WRITE | @@ -168,7 +173,8 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_WRITE | TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE | TU_ACCESS_SYSMEM_WRITE | - TU_ACCESS_CP_WRITE, + TU_ACCESS_CP_WRITE | + TU_ACCESS_UCHE_INCOHERENT_WRITE, TU_ACCESS_ALL = TU_ACCESS_READ | diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index fe329cf62bc..d392f52113e 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -409,6 +409,7 @@ tu_get_features(struct tu_physical_device *pdevice, features->shaderInt16 = true; features->sparseBinding = pdevice->has_sparse; features->sparseResidencyBuffer = pdevice->has_sparse_prr; + features->sparseResidencyAliased = pdevice->has_sparse_prr; features->variableMultisampleRate = true; features->inheritedQueries = true;