tu: Use a7xx terminology for flushes

a7xx renamed events around flushing:

a6xx              a7xx
FLUSH             CLEAN
INVALIDATE        INVALIDATE
FLUSH+INVALIDATE  FLUSH

The FLUSH events stayed the same but now they also invalidate. By not
adopting the new CLEAN events, we're inadvertantly invalidating too
much.

This change is just a refactor, that makes generic code consistently use
the a7xx terminology. The next commit will actually make us use CLEAN.

Note that LRZ_FLUSH is deliberately not changed because it actually
also invalidates (and the real name on a6xx was FLUSH_AND_INVALIDATE).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29824>
This commit is contained in:
Connor Abbott 2024-06-20 12:56:19 -04:00 committed by Marge Bot
parent 0e220cd45a
commit c7284c94ef
5 changed files with 95 additions and 83 deletions

View file

@ -8,6 +8,18 @@
#include "adreno_pm4.xml.h"
/* On terminology:
* - CLEAN events write dirty cache lines to memory.
* - INVALIDATE events cause subsequent reads to read the cache line from
* memory.
*
* Prior to a7xx CLEAN was instead called FLUSH. On a7xx FLUSH events do a
* clean and invalidate. We stick to the a7xx terminology in fd_gpu_event, and
* map FD_*_CLEAN events to the corresponding FLUSH events on a6xx. Note
* however that FLUSH_SO_* events, which write streamout counters to memory
* and also do a CACHE_CLEAN, haven't changed their names on a7xx.
*/
enum fd_gpu_event : uint32_t {
FD_WRITE_PRIMITIVE_COUNTS = 0,
FD_START_PRIMITIVE_CTRS,
@ -22,13 +34,13 @@ enum fd_gpu_event : uint32_t {
FD_FLUSH_SO_1,
FD_FLUSH_SO_2,
FD_FLUSH_SO_3,
FD_CACHE_FLUSH,
FD_CACHE_CLEAN,
FD_CACHE_INVALIDATE,
FD_CCU_INVALIDATE_DEPTH,
FD_CCU_INVALIDATE_COLOR,
FD_CCU_FLUSH_BLIT_CACHE,
FD_CCU_FLUSH_DEPTH,
FD_CCU_FLUSH_COLOR,
FD_CCU_CLEAN_BLIT_CACHE,
FD_CCU_CLEAN_DEPTH,
FD_CCU_CLEAN_COLOR,
FD_LRZ_CLEAR,
FD_LRZ_FLUSH,
FD_BLIT,
@ -60,13 +72,13 @@ constexpr inline struct fd_gpu_event_info fd_gpu_events<A6XX>[FD_GPU_EVENT_MAX]
{FLUSH_SO_1, false}, /* FD_FLUSH_SO_1 */
{FLUSH_SO_2, false}, /* FD_FLUSH_SO_2 */
{FLUSH_SO_3, false}, /* FD_FLUSH_SO_3 */
{CACHE_FLUSH_TS, true}, /* FD_CACHE_FLUSH */
{CACHE_FLUSH_TS, true}, /* FD_CACHE_CLEAN */
{CACHE_INVALIDATE, false}, /* FD_CACHE_INVALIDATE */
{PC_CCU_INVALIDATE_DEPTH, false}, /* FD_CCU_INVALIDATE_DEPTH */
{PC_CCU_INVALIDATE_COLOR, false}, /* FD_CCU_INVALIDATE_COLOR */
{PC_CCU_RESOLVE_TS, true}, /* FD_CCU_FLUSH_BLIT_CACHE */
{PC_CCU_FLUSH_DEPTH_TS, true}, /* FD_CCU_FLUSH_DEPTH */
{PC_CCU_FLUSH_COLOR_TS, true}, /* FD_CCU_FLUSH_COLOR */
{PC_CCU_RESOLVE_TS, true}, /* FD_CCU_CLEAN_BLIT_CACHE */
{PC_CCU_FLUSH_DEPTH_TS, true}, /* FD_CCU_CLEAN_DEPTH */
{PC_CCU_FLUSH_COLOR_TS, true}, /* FD_CCU_CLEAN_COLOR */
{LRZ_CLEAR, false}, /* FD_LRZ_CLEAR */
{LRZ_FLUSH, false}, /* FD_LRZ_FLUSH */
{BLIT, false}, /* FD_BLIT */
@ -88,17 +100,17 @@ constexpr inline struct fd_gpu_event_info fd_gpu_events<A7XX>[FD_GPU_EVENT_MAX]
{FLUSH_SO_1, false}, /* FD_FLUSH_SO_1 */
{FLUSH_SO_2, false}, /* FD_FLUSH_SO_2 */
{FLUSH_SO_3, false}, /* FD_FLUSH_SO_3 */
{CACHE_FLUSH7, false}, /* FD_CACHE_FLUSH */
{CACHE_FLUSH7, false}, /* FD_CACHE_CLEAN */
{CACHE_INVALIDATE7, false}, /* FD_CACHE_INVALIDATE */
{CCU_INVALIDATE_DEPTH, false}, /* FD_CCU_INVALIDATE_DEPTH */
{CCU_INVALIDATE_COLOR, false}, /* FD_CCU_INVALIDATE_COLOR */
{CCU_RESOLVE_CLEAN, false}, /* FD_CCU_FLUSH_BLIT_CACHE */
{CCU_FLUSH_DEPTH, false}, /* FD_CCU_FLUSH_DEPTH */
{CCU_FLUSH_COLOR, false}, /* FD_CCU_FLUSH_COLOR */
{CCU_RESOLVE_CLEAN, false}, /* FD_CCU_CLEAN_BLIT_CACHE */
{CCU_FLUSH_DEPTH, false}, /* FD_CCU_CLEAN_DEPTH */
{CCU_FLUSH_COLOR, false}, /* FD_CCU_CLEAN_COLOR */
{LRZ_CLEAR, false}, /* FD_LRZ_CLEAR */
{LRZ_FLUSH, false}, /* FD_LRZ_FLUSH */
{BLIT, false}, /* FD_BLIT */
{LABEL, false}, /* FD_LABEL */
};
#endif
#endif

View file

@ -1799,7 +1799,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
* LRZ via CCU. Don't need to invalidate CCU since we are presumably
* writing whole cache lines we assume to be 64 bytes.
*/
tu_emit_event_write<CHIP>(cmd, &cmd->cs, FD_CACHE_FLUSH);
tu_emit_event_write<CHIP>(cmd, &cmd->cs, FD_CACHE_CLEAN);
ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z16_UNORM,
VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false,
@ -1817,7 +1817,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
* UCHE in the earlier GRAS stage.
*/
cmd->state.cache.flush_bits |=
TU_CMD_FLAG_CCU_FLUSH_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE |
TU_CMD_FLAG_CCU_CLEAN_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE |
TU_CMD_FLAG_WAIT_FOR_IDLE;
}
TU_GENX(tu6_clear_lrz);
@ -2481,7 +2481,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
/* When executed by the user there has to be a pipeline barrier here,
* but since we're doing it manually we'll have to flush ourselves.
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
tu_cs_emit_wfi(cs);
@ -3461,11 +3461,11 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
* beforehand as depth should already be flushed.
*/
if (vk_format_is_depth_or_stencil(attachment->format)) {
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_DEPTH);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_DEPTH);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_DEPTH);
} else {
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_COLOR);
}
@ -3840,7 +3840,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
/* On A7XX, we need to wait for any CP_EVENT_WRITE::BLIT operations
* arising from GMEM load/clears to land before we can continue.
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
/* Wait for cache event to land */
tu_cs_emit_wfi(cs);
@ -3851,7 +3851,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
* sysmem, and we generally assume that GMEM renderpasses leave their
* results in sysmem, so we need to flush manually here.
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
}
template <chip CHIP>
@ -3917,7 +3917,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
* results in sysmem, so we need to flush manually here. The 3d blit path
* writes to depth images as a color RT, so there's no need to flush depth.
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
/* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);

View file

@ -167,7 +167,7 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
cache->flush_bits = 0;
if (TU_DEBUG(FLUSHALL))
flushes |= TU_CMD_FLAG_ALL_FLUSH | TU_CMD_FLAG_ALL_INVALIDATE;
flushes |= TU_CMD_FLAG_ALL_CLEAN | TU_CMD_FLAG_ALL_INVALIDATE;
if (TU_DEBUG(SYNCDRAW))
flushes |= TU_CMD_FLAG_WAIT_MEM_WRITES |
@ -179,18 +179,18 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
* any data remains that hasn't yet been made available through a barrier.
* However it does seem to work for UCHE.
*/
if (flushes & (TU_CMD_FLAG_CCU_FLUSH_COLOR |
if (flushes & (TU_CMD_FLAG_CCU_CLEAN_COLOR |
TU_CMD_FLAG_CCU_INVALIDATE_COLOR))
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_COLOR);
if (flushes & (TU_CMD_FLAG_CCU_FLUSH_DEPTH |
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_COLOR);
if (flushes & (TU_CMD_FLAG_CCU_CLEAN_DEPTH |
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH))
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_DEPTH);
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_DEPTH);
if (flushes & TU_CMD_FLAG_CCU_INVALIDATE_COLOR)
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_INVALIDATE_COLOR);
if (flushes & TU_CMD_FLAG_CCU_INVALIDATE_DEPTH)
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_INVALIDATE_DEPTH);
if (flushes & TU_CMD_FLAG_CACHE_FLUSH)
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_FLUSH);
if (flushes & TU_CMD_FLAG_CACHE_CLEAN)
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_CLEAN);
if (flushes & TU_CMD_FLAG_CACHE_INVALIDATE)
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_INVALIDATE);
if (flushes & TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE) {
@ -199,11 +199,11 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
));
}
if (CHIP >= A7XX && flushes & TU_CMD_FLAG_BLIT_CACHE_FLUSH)
if (CHIP >= A7XX && flushes & TU_CMD_FLAG_BLIT_CACHE_CLEAN)
/* On A7XX, blit cache flushes are required to ensure blit writes are visible
* via UCHE. This isn't necessary on A6XX, all writes should be visible implictly.
*/
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_BLIT_CACHE);
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_BLIT_CACHE);
if (CHIP >= A7XX && (flushes & TU_CMD_FLAG_CCHE_INVALIDATE) &&
/* Invalidating UCHE seems to also invalidate CCHE */
!(flushes & TU_CMD_FLAG_CACHE_INVALIDATE))
@ -345,11 +345,11 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
if (ccu_state != cmd_buffer->state.ccu_state) {
if (cmd_buffer->state.ccu_state != TU_CMD_CCU_GMEM) {
cmd_buffer->state.cache.flush_bits |=
TU_CMD_FLAG_CCU_FLUSH_COLOR |
TU_CMD_FLAG_CCU_FLUSH_DEPTH;
TU_CMD_FLAG_CCU_CLEAN_COLOR |
TU_CMD_FLAG_CCU_CLEAN_DEPTH;
cmd_buffer->state.cache.pending_flush_bits &= ~(
TU_CMD_FLAG_CCU_FLUSH_COLOR |
TU_CMD_FLAG_CCU_FLUSH_DEPTH);
TU_CMD_FLAG_CCU_CLEAN_COLOR |
TU_CMD_FLAG_CCU_CLEAN_DEPTH);
}
cmd_buffer->state.cache.flush_bits |=
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
@ -1139,9 +1139,9 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
* resolve case. However, a flush afterwards isn't needed because of the
* last sentence and the fact that we're in sysmem mode.
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
if (subpass->resolve_depth_stencil)
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_DEPTH);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_DEPTH);
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
@ -1597,7 +1597,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* emit_vsc_overflow_test) or the VSC_DATA buffer directly (implicitly as
* part of draws).
*/
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_FLUSH);
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_CLEAN);
tu_cs_emit_wfi(cs);
@ -2059,7 +2059,7 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_lrz_tiling_end<CHIP>(cmd, cs);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
tu_cs_sanity_check(cs);
}
@ -3239,14 +3239,14 @@ tu_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
}
/* Flush everything which has been made available but we haven't actually
* flushed yet.
/* Clean everything which has been made available but we haven't actually
* cleaned yet.
*/
static void
tu_flush_all_pending(struct tu_cache_state *cache)
tu_clean_all_pending(struct tu_cache_state *cache)
{
cache->flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_FLUSH;
cache->pending_flush_bits &= ~TU_CMD_FLAG_ALL_FLUSH;
cache->flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_CLEAN;
cache->pending_flush_bits &= ~TU_CMD_FLAG_ALL_CLEAN;
}
template <chip CHIP>
@ -3268,15 +3268,15 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
* command buffers there wouldn't be any unnecessary flushes in between.
*/
if (cmd_buffer->state.pass) {
tu_flush_all_pending(&cmd_buffer->state.renderpass_cache);
tu_clean_all_pending(&cmd_buffer->state.renderpass_cache);
tu_emit_cache_flush_renderpass<CHIP>(cmd_buffer);
trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->draw_cs);
} else {
tu_flush_all_pending(&cmd_buffer->state.cache);
tu_clean_all_pending(&cmd_buffer->state.cache);
cmd_buffer->state.cache.flush_bits |=
TU_CMD_FLAG_CCU_FLUSH_COLOR |
TU_CMD_FLAG_CCU_FLUSH_DEPTH;
TU_CMD_FLAG_CCU_CLEAN_COLOR |
TU_CMD_FLAG_CCU_CLEAN_DEPTH;
tu_emit_cache_flush<CHIP>(cmd_buffer);
trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs);
@ -3520,27 +3520,27 @@ tu_flush_for_access(struct tu_cache_state *cache,
TU_CMD_FLAG_ALL_INVALIDATE;
}
#define SRC_FLUSH(domain, flush, invalidate) \
#define SRC_FLUSH(domain, clean, invalidate) \
if (src_mask & TU_ACCESS_##domain##_WRITE) { \
cache->pending_flush_bits |= TU_CMD_FLAG_##flush | \
cache->pending_flush_bits |= TU_CMD_FLAG_##clean | \
(TU_CMD_FLAG_ALL_INVALIDATE & ~TU_CMD_FLAG_##invalidate); \
}
SRC_FLUSH(UCHE, CACHE_FLUSH, CACHE_INVALIDATE)
SRC_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
SRC_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
SRC_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
SRC_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
SRC_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
#undef SRC_FLUSH
#define SRC_INCOHERENT_FLUSH(domain, flush, invalidate) \
#define SRC_INCOHERENT_FLUSH(domain, clean, invalidate) \
if (src_mask & TU_ACCESS_##domain##_INCOHERENT_WRITE) { \
flush_bits |= TU_CMD_FLAG_##flush; \
flush_bits |= TU_CMD_FLAG_##clean; \
cache->pending_flush_bits |= \
(TU_CMD_FLAG_ALL_INVALIDATE & ~TU_CMD_FLAG_##invalidate); \
}
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
#undef SRC_INCOHERENT_FLUSH
@ -3548,20 +3548,20 @@ tu_flush_for_access(struct tu_cache_state *cache,
* drains the queue before signalling completion to the host.
*/
if (dst_mask & (TU_ACCESS_SYSMEM_READ | TU_ACCESS_SYSMEM_WRITE)) {
flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_FLUSH;
flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_CLEAN;
}
#define DST_FLUSH(domain, flush, invalidate) \
#define DST_FLUSH(domain, clean, invalidate) \
if (dst_mask & (TU_ACCESS_##domain##_READ | \
TU_ACCESS_##domain##_WRITE)) { \
flush_bits |= cache->pending_flush_bits & \
(TU_CMD_FLAG_##invalidate | \
(TU_CMD_FLAG_ALL_FLUSH & ~TU_CMD_FLAG_##flush)); \
(TU_CMD_FLAG_ALL_CLEAN & ~TU_CMD_FLAG_##clean)); \
}
DST_FLUSH(UCHE, CACHE_FLUSH, CACHE_INVALIDATE)
DST_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
DST_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
DST_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
DST_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
DST_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
#undef DST_FLUSH
@ -3570,11 +3570,11 @@ tu_flush_for_access(struct tu_cache_state *cache,
TU_ACCESS_##domain##_INCOHERENT_WRITE)) { \
flush_bits |= TU_CMD_FLAG_##invalidate | \
(cache->pending_flush_bits & \
(TU_CMD_FLAG_ALL_FLUSH & ~TU_CMD_FLAG_##flush)); \
(TU_CMD_FLAG_ALL_CLEAN & ~TU_CMD_FLAG_##flush)); \
}
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
@ -3589,7 +3589,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
/* The blit cache is a special case dependency between CP_EVENT_WRITE::BLIT
* (from GMEM loads/clears) to any GMEM attachment reads done via the UCHE
* (Eg: Input attachments/CP_BLIT) which needs an explicit BLIT_CACHE_FLUSH
* (Eg: Input attachments/CP_BLIT) which needs an explicit BLIT_CACHE_CLEAN
* for the event blit writes to land, it has the following properties:
* - Set on reads rather than on writes, like flushes.
* - Not executed automatically if pending, like invalidates.
@ -3597,12 +3597,12 @@ tu_flush_for_access(struct tu_cache_state *cache,
* continuing the render pass.
*/
if (src_mask & TU_ACCESS_BLIT_WRITE_GMEM) {
cache->pending_flush_bits |= TU_CMD_FLAG_BLIT_CACHE_FLUSH;
cache->pending_flush_bits |= TU_CMD_FLAG_BLIT_CACHE_CLEAN;
}
if ((dst_mask & TU_ACCESS_UCHE_READ_GMEM) &&
(cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_FLUSH)) {
flush_bits |= TU_CMD_FLAG_BLIT_CACHE_FLUSH;
(cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_CLEAN)) {
flush_bits |= TU_CMD_FLAG_BLIT_CACHE_CLEAN;
}
#undef DST_INCOHERENT_FLUSH
@ -4057,10 +4057,10 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
/* Emit any pending flushes. */
if (cmd->state.pass) {
tu_flush_all_pending(&cmd->state.renderpass_cache);
tu_clean_all_pending(&cmd->state.renderpass_cache);
TU_CALLX(cmd->device, tu_emit_cache_flush_renderpass)(cmd);
} else {
tu_flush_all_pending(&cmd->state.cache);
tu_clean_all_pending(&cmd->state.cache);
TU_CALLX(cmd->device, tu_emit_cache_flush)(cmd);
}
@ -4221,7 +4221,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
if (cmd->state.pass) {
struct tu_cache_state *cache = &cmd->state.renderpass_cache;
BITMASK_ENUM(tu_cmd_flush_bits) retained_pending_flush_bits =
cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_FLUSH;
cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_CLEAN;
tu_cache_init(cache);
cache->pending_flush_bits |= retained_pending_flush_bits;
} else {

View file

@ -195,11 +195,11 @@ enum tu_stage {
};
enum tu_cmd_flush_bits {
TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
TU_CMD_FLAG_CCU_CLEAN_DEPTH = 1 << 0,
TU_CMD_FLAG_CCU_CLEAN_COLOR = 1 << 1,
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
TU_CMD_FLAG_CACHE_CLEAN = 1 << 4,
TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
TU_CMD_FLAG_CCHE_INVALIDATE = 1 << 6,
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 7,
@ -209,12 +209,12 @@ enum tu_cmd_flush_bits {
/* This is an unusual flush that isn't automatically executed if pending,
* as it isn't necessary. Therefore, it's not included in ALL_FLUSH.
*/
TU_CMD_FLAG_BLIT_CACHE_FLUSH = 1 << 11,
TU_CMD_FLAG_BLIT_CACHE_CLEAN = 1 << 11,
TU_CMD_FLAG_ALL_FLUSH =
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
TU_CMD_FLAG_CCU_FLUSH_COLOR |
TU_CMD_FLAG_CACHE_FLUSH |
TU_CMD_FLAG_ALL_CLEAN =
TU_CMD_FLAG_CCU_CLEAN_DEPTH |
TU_CMD_FLAG_CCU_CLEAN_COLOR |
TU_CMD_FLAG_CACHE_CLEAN |
/* Treat the CP as a sort of "cache" which may need to be "flushed" via
* waiting for writes to land with WAIT_FOR_MEM_WRITES.
*/

View file

@ -1481,7 +1481,7 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_WRITE_PRIMITIVE_COUNTS);
tu_cs_emit_wfi(cs);
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_FLUSH);
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_CLEAN);
/* Set the count of written primitives */
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
@ -1492,7 +1492,7 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(cs, end_written_iova);
tu_cs_emit_qw(cs, begin_written_iova);
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_FLUSH);
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_CLEAN);
/* Set the count of generated primitives */
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);