mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-21 04:20:24 +01:00
tu: Support sparseResidencyAliased
UCHE and CCU use virtual-tagged addresses, so whenever an alias may have changed we have to always flush and invalidate everything. We detect this through the sparse memory aliasing flag on the buffer/image, or for plain memory barriers whether the feature is enabled. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32533>
This commit is contained in:
parent
8feed47fce
commit
4d2c14847f
3 changed files with 64 additions and 21 deletions
|
|
@ -4713,6 +4713,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
|
||||
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
SRC_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
|
||||
|
||||
#undef SRC_INCOHERENT_FLUSH
|
||||
|
||||
|
|
@ -4747,6 +4748,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
|
||||
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
DST_INCOHERENT_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
|
||||
|
||||
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
|
||||
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
|
||||
|
|
@ -4841,7 +4843,8 @@ gfx_write_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages,
|
|||
|
||||
static enum tu_cmd_access_mask
|
||||
vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
||||
VkPipelineStageFlags2 stages, bool image_only, bool gmem)
|
||||
VkPipelineStageFlags2 stages, bool image_only, bool gmem,
|
||||
bool sparse_aliasing)
|
||||
{
|
||||
BITMASK_ENUM(tu_cmd_access_mask) mask = 0;
|
||||
|
||||
|
|
@ -4891,8 +4894,13 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
|||
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
|
||||
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
|
||||
SHADER_STAGES))
|
||||
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
|
||||
SHADER_STAGES)) {
|
||||
if (sparse_aliasing)
|
||||
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
|
||||
else
|
||||
mask |= TU_ACCESS_UCHE_READ;
|
||||
mask |= TU_ACCESS_CCHE_READ;
|
||||
}
|
||||
|
||||
if (gfx_read_access(flags, stages,
|
||||
VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR,
|
||||
|
|
@ -4912,13 +4920,20 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
|||
|
||||
if (gfx_read_access(flags, stages,
|
||||
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT,
|
||||
SHADER_STAGES))
|
||||
SHADER_STAGES)) {
|
||||
mask |= TU_ACCESS_UCHE_READ_GMEM;
|
||||
if (sparse_aliasing)
|
||||
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
|
||||
}
|
||||
|
||||
if (gfx_read_access(flags, stages,
|
||||
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT,
|
||||
SHADER_STAGES)) {
|
||||
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
|
||||
if (sparse_aliasing)
|
||||
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
|
||||
else
|
||||
mask |= TU_ACCESS_UCHE_READ;
|
||||
mask |= TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
|
||||
TU_ACCESS_CCHE_READ;
|
||||
}
|
||||
|
||||
|
|
@ -4927,8 +4942,12 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
|||
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT |
|
||||
VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
|
||||
VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
|
||||
SHADER_STAGES))
|
||||
mask |= TU_ACCESS_UCHE_WRITE;
|
||||
SHADER_STAGES)) {
|
||||
if (sparse_aliasing)
|
||||
mask |= TU_ACCESS_UCHE_INCOHERENT_WRITE;
|
||||
else
|
||||
mask |= TU_ACCESS_UCHE_WRITE;
|
||||
}
|
||||
|
||||
if (gfx_write_access(flags, stages,
|
||||
VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||
|
|
@ -4992,7 +5011,7 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
|||
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) {
|
||||
if (gmem) {
|
||||
mask |= TU_ACCESS_SYSMEM_WRITE;
|
||||
} else if (image_only) {
|
||||
} else if (image_only && !sparse_aliasing) {
|
||||
/* Because we always split up blits/copies of images involving
|
||||
* multiple layers, we always access each layer in the same way, with
|
||||
* the same base address, same format, etc. This means we can avoid
|
||||
|
|
@ -5011,7 +5030,11 @@ vk2tu_access(VkAccessFlags2 flags, VkAccessFlags3KHR flags2,
|
|||
VK_PIPELINE_STAGE_2_BLIT_BIT |
|
||||
VK_PIPELINE_STAGE_2_RESOLVE_BIT |
|
||||
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT)) {
|
||||
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
|
||||
if (sparse_aliasing)
|
||||
mask |= TU_ACCESS_UCHE_INCOHERENT_READ;
|
||||
else
|
||||
mask |= TU_ACCESS_UCHE_READ;
|
||||
mask |= TU_ACCESS_CCHE_READ;
|
||||
}
|
||||
|
||||
return mask;
|
||||
|
|
@ -5466,10 +5489,12 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer,
|
|||
sanitize_dst_stage(barrier->dst_stage_mask);
|
||||
BITMASK_ENUM(tu_cmd_access_mask) src_flags =
|
||||
vk2tu_access(barrier->src_access_mask, barrier->src_access_mask2,
|
||||
src_stage_vk, false, false);
|
||||
src_stage_vk, false, false,
|
||||
cmd_buffer->device->vk.enabled_features.sparseResidencyAliased);
|
||||
BITMASK_ENUM(tu_cmd_access_mask) dst_flags =
|
||||
vk2tu_access(barrier->dst_access_mask, barrier->dst_access_mask2,
|
||||
dst_stage_vk, false, false);
|
||||
dst_stage_vk, false, false,
|
||||
cmd_buffer->device->vk.enabled_features.sparseResidencyAliased);
|
||||
|
||||
if (barrier->incoherent_ccu_color)
|
||||
src_flags |= TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE;
|
||||
|
|
@ -8296,9 +8321,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
|
||||
sanitized_src_stage, false, gmem);
|
||||
sanitized_src_stage, false, gmem,
|
||||
cmd->device->vk.enabled_features.sparseResidencyAliased);
|
||||
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
|
||||
sanitized_dst_stage, false, gmem);
|
||||
sanitized_dst_stage, false, gmem,
|
||||
cmd->device->vk.enabled_features.sparseResidencyAliased);
|
||||
srcStage |= sanitized_src_stage;
|
||||
dstStage |= sanitized_dst_stage;
|
||||
}
|
||||
|
|
@ -8306,6 +8333,9 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
|
||||
const VkBufferMemoryBarrier2 *barrier =
|
||||
&dep_info->pBufferMemoryBarriers[i];
|
||||
VK_FROM_HANDLE(tu_buffer, buffer, barrier->buffer);
|
||||
bool sparse_aliasing =
|
||||
buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
|
||||
VkPipelineStageFlags2 sanitized_src_stage =
|
||||
sanitize_src_stage(barrier->srcStageMask);
|
||||
VkPipelineStageFlags2 sanitized_dst_stage =
|
||||
|
|
@ -8320,9 +8350,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
|
||||
sanitized_src_stage, false, gmem);
|
||||
sanitized_src_stage, false, gmem,
|
||||
sparse_aliasing);
|
||||
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
|
||||
sanitized_dst_stage, false, gmem);
|
||||
sanitized_dst_stage, false, gmem,
|
||||
sparse_aliasing);
|
||||
srcStage |= sanitized_src_stage;
|
||||
dstStage |= sanitized_dst_stage;
|
||||
}
|
||||
|
|
@ -8330,8 +8362,12 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
|
||||
const VkImageMemoryBarrier2 *barrier =
|
||||
&dep_info->pImageMemoryBarriers[i];
|
||||
VK_FROM_HANDLE(tu_image, image, barrier->image);
|
||||
|
||||
VkImageLayout old_layout = barrier->oldLayout;
|
||||
|
||||
bool sparse_aliasing =
|
||||
image->vk.create_flags & VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
|
||||
if (old_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
/* The underlying memory for this image may have been used earlier
|
||||
* within the same queue submission for a different image, which
|
||||
|
|
@ -8340,8 +8376,6 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
* to the image. We don't want these entries being flushed later and
|
||||
* overwriting the actual image, so we need to flush the CCU.
|
||||
*/
|
||||
VK_FROM_HANDLE(tu_image, image, barrier->image);
|
||||
|
||||
if (vk_format_is_depth_or_stencil(image->vk.format)) {
|
||||
src_flags |= TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE;
|
||||
} else {
|
||||
|
|
@ -8362,9 +8396,11 @@ tu_barrier(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
src_flags |= vk2tu_access(barrier->srcAccessMask, src_access_mask2,
|
||||
sanitized_src_stage, true, gmem);
|
||||
sanitized_src_stage, true, gmem,
|
||||
sparse_aliasing);
|
||||
dst_flags |= vk2tu_access(barrier->dstAccessMask, dst_access_mask2,
|
||||
sanitized_dst_stage, true, gmem);
|
||||
sanitized_dst_stage, true, gmem,
|
||||
sparse_aliasing);
|
||||
srcStage |= sanitized_src_stage;
|
||||
dstStage |= sanitized_dst_stage;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -151,6 +151,10 @@ enum tu_cmd_access_mask {
|
|||
|
||||
TU_ACCESS_RTU_READ = 1 << 17,
|
||||
|
||||
/* An access through UCHE that must always be flushed/invalidated */
|
||||
TU_ACCESS_UCHE_INCOHERENT_READ = 1 << 18,
|
||||
TU_ACCESS_UCHE_INCOHERENT_WRITE = 1 << 19,
|
||||
|
||||
TU_ACCESS_READ =
|
||||
TU_ACCESS_UCHE_READ |
|
||||
TU_ACCESS_CCU_COLOR_READ |
|
||||
|
|
@ -159,7 +163,8 @@ enum tu_cmd_access_mask {
|
|||
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
|
||||
TU_ACCESS_SYSMEM_READ |
|
||||
TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
|
||||
TU_ACCESS_CCHE_READ,
|
||||
TU_ACCESS_CCHE_READ |
|
||||
TU_ACCESS_UCHE_INCOHERENT_READ,
|
||||
|
||||
TU_ACCESS_WRITE =
|
||||
TU_ACCESS_UCHE_WRITE |
|
||||
|
|
@ -168,7 +173,8 @@ enum tu_cmd_access_mask {
|
|||
TU_ACCESS_CCU_DEPTH_WRITE |
|
||||
TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
|
||||
TU_ACCESS_SYSMEM_WRITE |
|
||||
TU_ACCESS_CP_WRITE,
|
||||
TU_ACCESS_CP_WRITE |
|
||||
TU_ACCESS_UCHE_INCOHERENT_WRITE,
|
||||
|
||||
TU_ACCESS_ALL =
|
||||
TU_ACCESS_READ |
|
||||
|
|
|
|||
|
|
@ -409,6 +409,7 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
features->shaderInt16 = true;
|
||||
features->sparseBinding = pdevice->has_sparse;
|
||||
features->sparseResidencyBuffer = pdevice->has_sparse_prr;
|
||||
features->sparseResidencyAliased = pdevice->has_sparse_prr;
|
||||
features->variableMultisampleRate = true;
|
||||
features->inheritedQueries = true;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue