tu: Support sparseResidencyAliased

UCHE and CCU use virtual-tagged addresses, so whenever an alias may have
changed we have to always flush and invalidate everything. We detect
this through the sparse memory aliasing flag on the buffer/image, or for
plain memory barriers whether the feature is enabled.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32533>
This commit is contained in:
Connor Abbott 2024-12-06 14:08:03 -05:00 committed by Marge Bot
parent 8feed47fce
commit 4d2c14847f
3 changed files with 64 additions and 21 deletions

View file

@ -4713,6 +4713,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
SRC_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
#undef SRC_INCOHERENT_FLUSH
@ -4747,6 +4748,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
DST_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
@ -4841,7 +4843,8 @@ gfx_write_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages,
static enum tu_cmd_access_mask
vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
VkPipelineStageFlags2 stages, bool image_only, bool gmem)
VkPipelineStageFlags2 stages, bool image_only, bool gmem,
bool sparse_aliasing)
{
BITMASK_ENUM(tu_cmd_access_mask) mask = 0;
@ -4891,8 +4894,13 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
SHADER_STAGES))
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
SHADER_STAGES)) {
if (sparse_aliasing)
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
else
mask |= TU_ACCESS_UCHE_READ;
mask |= TU_ACCESS_CCHE_READ;
}
if (gfx_read_access(flags, stages,
VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR,
@ -4912,13 +4920,20 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
if (gfx_read_access(flags, stages,
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT,
SHADER_STAGES))
SHADER_STAGES)) {
mask |= TU_ACCESS_UCHE_READ_GMEM;
if (sparse_aliasing)
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
}
if (gfx_read_access(flags, stages,
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT,
SHADER_STAGES)) {
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
if (sparse_aliasing)
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
else
mask |= TU_ACCESS_UCHE_READ;
mask |= TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
TU_ACCESS_CCHE_READ;
}
@ -4927,8 +4942,12 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT |
VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
SHADER_STAGES))
mask |= TU_ACCESS_UCHE_WRITE;
SHADER_STAGES)) {
if (sparse_aliasing)
mask |= TU_ACCESS_UCHE_INCOHERENT_WRITE;
else
mask |= TU_ACCESS_UCHE_WRITE;
}
if (gfx_write_access(flags, stages,
VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
@ -4992,7 +5011,7 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) {
if (gmem) {
mask |= TU_ACCESS_SYSMEM_WRITE;
} else if (image_only) {
} else if (image_only && !sparse_aliasing) {
/* Because we always split up blits/copies of images involving
* multiple layers, we always access each layer in the same way, with
* the same base address, same format, etc. This means we can avoid
@ -5011,7 +5030,11 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
VK_PIPELINE_STAGE_2_BLIT_BIT |
VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) {
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
if (sparse_aliasing)
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
else
mask |= TU_ACCESS_UCHE_READ;
mask |= TU_ACCESS_CCHE_READ;
}
return mask;
@ -5466,10 +5489,12 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer,
sanitize_dst_stage(barrier->dst_stage_mask);
BITMASK_ENUM(tu_cmd_access_mask) src_flags =
vk2tu_access(barrier->src_access_mask, barrier->src_access_mask2,
src_stage_vk, false, false);
src_stage_vk, false, false,
cmd_buffer->device->vk.enabled_features.sparseResidencyAliased);
BITMASK_ENUM(tu_cmd_access_mask) dst_flags =
vk2tu_access(barrier->dst_access_mask, barrier->dst_access_mask2,
dst_stage_vk, false, false);
dst_stage_vk, false, false,
cmd_buffer->device->vk.enabled_features.sparseResidencyAliased);
if (barrier->incoherent_ccu_color)
src_flags |= TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE;
@ -8296,9 +8321,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
}
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
sanitized_src_stage, false, gmem);
sanitized_src_stage, false, gmem,
cmd->device->vk.enabled_features.sparseResidencyAliased);
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
sanitized_dst_stage, false, gmem);
sanitized_dst_stage, false, gmem,
cmd->device->vk.enabled_features.sparseResidencyAliased);
srcStage |= sanitized_src_stage;
dstStage |= sanitized_dst_stage;
}
@ -8306,6 +8333,9 @@ tu_barrier(struct tu_cmd_buffer *cmd,
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
const VkBufferMemoryBarrier2 *barrier =
&dep_info->pBufferMemoryBarriers[i];
VK_FROM_HANDLE(tu_buffer, buffer, barrier->buffer);
bool sparse_aliasing =
buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
VkPipelineStageFlags2 sanitized_src_stage =
sanitize_src_stage(barrier->srcStageMask);
VkPipelineStageFlags2 sanitized_dst_stage =
@ -8320,9 +8350,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
}
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
sanitized_src_stage, false, gmem);
sanitized_src_stage, false, gmem,
sparse_aliasing);
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
sanitized_dst_stage, false, gmem);
sanitized_dst_stage, false, gmem,
sparse_aliasing);
srcStage |= sanitized_src_stage;
dstStage |= sanitized_dst_stage;
}
@ -8330,8 +8362,12 @@ tu_barrier(struct tu_cmd_buffer *cmd,
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2 *barrier =
&dep_info->pImageMemoryBarriers[i];
VK_FROM_HANDLE(tu_image, image, barrier->image);
VkImageLayout old_layout = barrier->oldLayout;
bool sparse_aliasing =
image->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
/* The underlying memory for this image may have been used earlier
* within the same queue submission for a different image, which
@ -8340,8 +8376,6 @@ tu_barrier(struct tu_cmd_buffer *cmd,
* to the image. We don't want these entries being flushed later and
* overwriting the actual image, so we need to flush the CCU.
*/
VK_FROM_HANDLE(tu_image, image, barrier->image);
if (vk_format_is_depth_or_stencil(image->vk.format)) {
src_flags |= TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE;
} else {
@ -8362,9 +8396,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
}
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
sanitized_src_stage, true, gmem);
sanitized_src_stage, true, gmem,
sparse_aliasing);
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
sanitized_dst_stage, true, gmem);
sanitized_dst_stage, true, gmem,
sparse_aliasing);
srcStage |= sanitized_src_stage;
dstStage |= sanitized_dst_stage;
}

View file

@ -151,6 +151,10 @@ enum tu_cmd_access_mask {
TU_ACCESS_RTU_READ = 1 << 17,
/* An access through UCHE that must always be flushed/invalidated */
TU_ACCESS_UCHE_INCOHERENT_READ = 1 << 18,
TU_ACCESS_UCHE_INCOHERENT_WRITE = 1 << 19,
TU_ACCESS_READ =
TU_ACCESS_UCHE_READ |
TU_ACCESS_CCU_COLOR_READ |
@ -159,7 +163,8 @@ enum tu_cmd_access_mask {
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
TU_ACCESS_SYSMEM_READ |
TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
TU_ACCESS_CCHE_READ,
TU_ACCESS_CCHE_READ |
TU_ACCESS_UCHE_INCOHERENT_READ,
TU_ACCESS_WRITE =
TU_ACCESS_UCHE_WRITE |
@ -168,7 +173,8 @@ enum tu_cmd_access_mask {
TU_ACCESS_CCU_DEPTH_WRITE |
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
TU_ACCESS_SYSMEM_WRITE |
TU_ACCESS_CP_WRITE,
TU_ACCESS_CP_WRITE |
TU_ACCESS_UCHE_INCOHERENT_WRITE,
TU_ACCESS_ALL =
TU_ACCESS_READ |

View file

@ -409,6 +409,7 @@ tu_get_features(struct tu_physical_device *pdevice,
features->shaderInt16 = true;
features->sparseBinding = pdevice->has_sparse;
features->sparseResidencyBuffer = pdevice->has_sparse_prr;
features->sparseResidencyAliased = pdevice->has_sparse_prr;
features->variableMultisampleRate = true;
features->inheritedQueries = true;