tu: Implement CCHE invalidation

We need invalidate CCHE when we optimize out an invalidation of UCHE,
for example a storage image write to texture read. We missed this
earlier because of the blob's tendency to always over-flush, but the
blob does use this when building acceleration structures.

Fixes: 95104707f1 ("tu: Basic a7xx support")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28445>
This commit is contained in:
Connor Abbott 2024-03-25 13:00:34 -04:00 committed by Marge Bot
parent abe9bd38ff
commit fb1c3f7f5d
2 changed files with 30 additions and 8 deletions

View file

@ -203,6 +203,10 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
* via UCHE. This isn't necessary on A6XX, all writes should be visible implictly.
*/
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_BLIT_CACHE);
if (CHIP >= A7XX && (flushes & TU_CMD_FLAG_CCHE_INVALIDATE) &&
/* Invalidating UCHE seems to also invalidate CCHE */
!(flushes & TU_CMD_FLAG_CACHE_INVALIDATE))
tu_cs_emit_pkt7(cs, CP_CCHE_INVALIDATE, 0);
if (flushes & TU_CMD_FLAG_WAIT_MEM_WRITES)
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if (flushes & TU_CMD_FLAG_WAIT_FOR_IDLE)
@ -3365,6 +3369,13 @@ tu_flush_for_access(struct tu_cache_state *cache,
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
}
/* There are multiple incoherent copies of CCHE, so any read through it may
* require invalidating it and we cannot optimize away invalidates.
*/
if (dst_mask & TU_ACCESS_CCHE_READ) {
flush_bits |= TU_CMD_FLAG_CCHE_INVALIDATE;
}
/* The blit cache is a special case dependency between CP_EVENT_WRITE::BLIT
* (from GMEM loads/clears) to any GMEM attachment reads done via the UCHE
* (Eg: Input attachments/CP_BLIT) which needs an explicit BLIT_CACHE_FLUSH
@ -3484,7 +3495,7 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT |
SHADER_STAGES))
mask |= TU_ACCESS_UCHE_READ;
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_CCHE_READ;
if (gfx_read_access(flags, stages,
VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT,
@ -3494,7 +3505,8 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
if (gfx_read_access(flags, stages,
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT,
SHADER_STAGES)) {
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ;
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
TU_ACCESS_CCHE_READ;
}
if (gfx_write_access(flags, stages,

View file

@ -138,6 +138,13 @@ enum tu_cmd_access_mask {
/* Similar to UCHE_READ, but specifically for GMEM attachment reads. */
TU_ACCESS_UCHE_READ_GMEM = 1 << 15,
/* The CCHE is a write-through cache which sits behind UCHE, with multiple
* incoherent copies. Because it's write-through we only have to worry
* about invalidating it for reads. It's invalidated by "ccinv" in the
* shader and CP_CCHE_INVALIDATE in the command stream.
*/
TU_ACCESS_CCHE_READ = 1 << 16,
TU_ACCESS_READ =
TU_ACCESS_UCHE_READ |
TU_ACCESS_CCU_COLOR_READ |
@ -145,7 +152,8 @@ enum tu_cmd_access_mask {
TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
TU_ACCESS_SYSMEM_READ |
TU_ACCESS_BINDLESS_DESCRIPTOR_READ,
TU_ACCESS_BINDLESS_DESCRIPTOR_READ |
TU_ACCESS_CCHE_READ,
TU_ACCESS_WRITE =
TU_ACCESS_UCHE_WRITE |
@ -192,14 +200,15 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE = 1 << 9,
TU_CMD_FLAG_CCHE_INVALIDATE = 1 << 6,
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 7,
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 8,
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 9,
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE = 1 << 10,
/* This is an unusual flush that isn't automatically executed if pending,
* as it isn't necessary. Therefore, it's not included in ALL_FLUSH.
*/
TU_CMD_FLAG_BLIT_CACHE_FLUSH = 1 << 10,
TU_CMD_FLAG_BLIT_CACHE_FLUSH = 1 << 11,
TU_CMD_FLAG_ALL_FLUSH =
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
@ -215,6 +224,7 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
TU_CMD_FLAG_CACHE_INVALIDATE |
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE |
TU_CMD_FLAG_CCHE_INVALIDATE |
/* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a
* a command that needs CP_WAIT_FOR_ME is executed. This means we may
* insert an extra WAIT_FOR_ME before an indirect command requiring it