mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
tu: Use a7xx terminology for flushes
a7xx renamed events around flushing: a6xx a7xx FLUSH CLEAN INVALIDATE INVALIDATE FLUSH+INVALIDATE FLUSH The FLUSH events stayed the same but now they also invalidate. By not adopting the new CLEAN events, we're inadvertantly invalidating too much. This change is just a refactor, that makes generic code consistently use the a7xx terminology. The next commit will actually make us use CLEAN. Note that LRZ_FLUSH is deliberately not changed because it actually also invalidates (and the real name on a6xx was FLUSH_AND_INVALIDATE). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29824>
This commit is contained in:
parent
0e220cd45a
commit
c7284c94ef
5 changed files with 95 additions and 83 deletions
|
|
@ -8,6 +8,18 @@
|
|||
|
||||
#include "adreno_pm4.xml.h"
|
||||
|
||||
/* On terminology:
|
||||
* - CLEAN events write dirty cache lines to memory.
|
||||
* - INVALIDATE events cause subsequent reads to read the cache line from
|
||||
* memory.
|
||||
*
|
||||
* Prior to a7xx CLEAN was instead called FLUSH. On a7xx FLUSH events do a
|
||||
* clean and invalidate. We stick to the a7xx terminology in fd_gpu_event, and
|
||||
* map FD_*_CLEAN events to the corresponding FLUSH events on a6xx. Note
|
||||
* however that FLUSH_SO_* events, which write streamout counters to memory
|
||||
* and also do a CACHE_CLEAN, haven't changed their names on a7xx.
|
||||
*/
|
||||
|
||||
enum fd_gpu_event : uint32_t {
|
||||
FD_WRITE_PRIMITIVE_COUNTS = 0,
|
||||
FD_START_PRIMITIVE_CTRS,
|
||||
|
|
@ -22,13 +34,13 @@ enum fd_gpu_event : uint32_t {
|
|||
FD_FLUSH_SO_1,
|
||||
FD_FLUSH_SO_2,
|
||||
FD_FLUSH_SO_3,
|
||||
FD_CACHE_FLUSH,
|
||||
FD_CACHE_CLEAN,
|
||||
FD_CACHE_INVALIDATE,
|
||||
FD_CCU_INVALIDATE_DEPTH,
|
||||
FD_CCU_INVALIDATE_COLOR,
|
||||
FD_CCU_FLUSH_BLIT_CACHE,
|
||||
FD_CCU_FLUSH_DEPTH,
|
||||
FD_CCU_FLUSH_COLOR,
|
||||
FD_CCU_CLEAN_BLIT_CACHE,
|
||||
FD_CCU_CLEAN_DEPTH,
|
||||
FD_CCU_CLEAN_COLOR,
|
||||
FD_LRZ_CLEAR,
|
||||
FD_LRZ_FLUSH,
|
||||
FD_BLIT,
|
||||
|
|
@ -60,13 +72,13 @@ constexpr inline struct fd_gpu_event_info fd_gpu_events<A6XX>[FD_GPU_EVENT_MAX]
|
|||
{FLUSH_SO_1, false}, /* FD_FLUSH_SO_1 */
|
||||
{FLUSH_SO_2, false}, /* FD_FLUSH_SO_2 */
|
||||
{FLUSH_SO_3, false}, /* FD_FLUSH_SO_3 */
|
||||
{CACHE_FLUSH_TS, true}, /* FD_CACHE_FLUSH */
|
||||
{CACHE_FLUSH_TS, true}, /* FD_CACHE_CLEAN */
|
||||
{CACHE_INVALIDATE, false}, /* FD_CACHE_INVALIDATE */
|
||||
{PC_CCU_INVALIDATE_DEPTH, false}, /* FD_CCU_INVALIDATE_DEPTH */
|
||||
{PC_CCU_INVALIDATE_COLOR, false}, /* FD_CCU_INVALIDATE_COLOR */
|
||||
{PC_CCU_RESOLVE_TS, true}, /* FD_CCU_FLUSH_BLIT_CACHE */
|
||||
{PC_CCU_FLUSH_DEPTH_TS, true}, /* FD_CCU_FLUSH_DEPTH */
|
||||
{PC_CCU_FLUSH_COLOR_TS, true}, /* FD_CCU_FLUSH_COLOR */
|
||||
{PC_CCU_RESOLVE_TS, true}, /* FD_CCU_CLEAN_BLIT_CACHE */
|
||||
{PC_CCU_FLUSH_DEPTH_TS, true}, /* FD_CCU_CLEAN_DEPTH */
|
||||
{PC_CCU_FLUSH_COLOR_TS, true}, /* FD_CCU_CLEAN_COLOR */
|
||||
{LRZ_CLEAR, false}, /* FD_LRZ_CLEAR */
|
||||
{LRZ_FLUSH, false}, /* FD_LRZ_FLUSH */
|
||||
{BLIT, false}, /* FD_BLIT */
|
||||
|
|
@ -88,17 +100,17 @@ constexpr inline struct fd_gpu_event_info fd_gpu_events<A7XX>[FD_GPU_EVENT_MAX]
|
|||
{FLUSH_SO_1, false}, /* FD_FLUSH_SO_1 */
|
||||
{FLUSH_SO_2, false}, /* FD_FLUSH_SO_2 */
|
||||
{FLUSH_SO_3, false}, /* FD_FLUSH_SO_3 */
|
||||
{CACHE_FLUSH7, false}, /* FD_CACHE_FLUSH */
|
||||
{CACHE_FLUSH7, false}, /* FD_CACHE_CLEAN */
|
||||
{CACHE_INVALIDATE7, false}, /* FD_CACHE_INVALIDATE */
|
||||
{CCU_INVALIDATE_DEPTH, false}, /* FD_CCU_INVALIDATE_DEPTH */
|
||||
{CCU_INVALIDATE_COLOR, false}, /* FD_CCU_INVALIDATE_COLOR */
|
||||
{CCU_RESOLVE_CLEAN, false}, /* FD_CCU_FLUSH_BLIT_CACHE */
|
||||
{CCU_FLUSH_DEPTH, false}, /* FD_CCU_FLUSH_DEPTH */
|
||||
{CCU_FLUSH_COLOR, false}, /* FD_CCU_FLUSH_COLOR */
|
||||
{CCU_RESOLVE_CLEAN, false}, /* FD_CCU_CLEAN_BLIT_CACHE */
|
||||
{CCU_FLUSH_DEPTH, false}, /* FD_CCU_CLEAN_DEPTH */
|
||||
{CCU_FLUSH_COLOR, false}, /* FD_CCU_CLEAN_COLOR */
|
||||
{LRZ_CLEAR, false}, /* FD_LRZ_CLEAR */
|
||||
{LRZ_FLUSH, false}, /* FD_LRZ_FLUSH */
|
||||
{BLIT, false}, /* FD_BLIT */
|
||||
{LABEL, false}, /* FD_LABEL */
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1799,7 +1799,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
|
|||
* LRZ via CCU. Don't need to invalidate CCU since we are presumably
|
||||
* writing whole cache lines we assume to be 64 bytes.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, &cmd->cs, FD_CACHE_FLUSH);
|
||||
tu_emit_event_write<CHIP>(cmd, &cmd->cs, FD_CACHE_CLEAN);
|
||||
|
||||
ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z16_UNORM,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false,
|
||||
|
|
@ -1817,7 +1817,7 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd,
|
|||
* UCHE in the earlier GRAS stage.
|
||||
*/
|
||||
cmd->state.cache.flush_bits |=
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE |
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE |
|
||||
TU_CMD_FLAG_WAIT_FOR_IDLE;
|
||||
}
|
||||
TU_GENX(tu6_clear_lrz);
|
||||
|
|
@ -2481,7 +2481,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
|
|||
/* When executed by the user there has to be a pipeline barrier here,
|
||||
* but since we're doing it manually we'll have to flush ourselves.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
|
|
@ -3461,11 +3461,11 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
* beforehand as depth should already be flushed.
|
||||
*/
|
||||
if (vk_format_is_depth_or_stencil(attachment->format)) {
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_DEPTH);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_DEPTH);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
} else {
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
}
|
||||
|
||||
|
|
@ -3840,7 +3840,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
|||
/* On A7XX, we need to wait for any CP_EVENT_WRITE::BLIT operations
|
||||
* arising from GMEM load/clears to land before we can continue.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
|
||||
|
||||
/* Wait for cache event to land */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
|
@ -3851,7 +3851,7 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
|||
* sysmem, and we generally assume that GMEM renderpasses leave their
|
||||
* results in sysmem, so we need to flush manually here.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -3917,7 +3917,7 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
|
|||
* results in sysmem, so we need to flush manually here. The 3d blit path
|
||||
* writes to depth images as a color RT, so there's no need to flush depth.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
|
||||
/* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
|
|||
cache->flush_bits = 0;
|
||||
|
||||
if (TU_DEBUG(FLUSHALL))
|
||||
flushes |= TU_CMD_FLAG_ALL_FLUSH | TU_CMD_FLAG_ALL_INVALIDATE;
|
||||
flushes |= TU_CMD_FLAG_ALL_CLEAN | TU_CMD_FLAG_ALL_INVALIDATE;
|
||||
|
||||
if (TU_DEBUG(SYNCDRAW))
|
||||
flushes |= TU_CMD_FLAG_WAIT_MEM_WRITES |
|
||||
|
|
@ -179,18 +179,18 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
|
|||
* any data remains that hasn't yet been made available through a barrier.
|
||||
* However it does seem to work for UCHE.
|
||||
*/
|
||||
if (flushes & (TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
||||
if (flushes & (TU_CMD_FLAG_CCU_CLEAN_COLOR |
|
||||
TU_CMD_FLAG_CCU_INVALIDATE_COLOR))
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_COLOR);
|
||||
if (flushes & (TU_CMD_FLAG_CCU_FLUSH_DEPTH |
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_COLOR);
|
||||
if (flushes & (TU_CMD_FLAG_CCU_CLEAN_DEPTH |
|
||||
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH))
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_DEPTH);
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_DEPTH);
|
||||
if (flushes & TU_CMD_FLAG_CCU_INVALIDATE_COLOR)
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
if (flushes & TU_CMD_FLAG_CCU_INVALIDATE_DEPTH)
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
if (flushes & TU_CMD_FLAG_CACHE_FLUSH)
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_FLUSH);
|
||||
if (flushes & TU_CMD_FLAG_CACHE_CLEAN)
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_CLEAN);
|
||||
if (flushes & TU_CMD_FLAG_CACHE_INVALIDATE)
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CACHE_INVALIDATE);
|
||||
if (flushes & TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE) {
|
||||
|
|
@ -199,11 +199,11 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
|
|||
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
));
|
||||
}
|
||||
if (CHIP >= A7XX && flushes & TU_CMD_FLAG_BLIT_CACHE_FLUSH)
|
||||
if (CHIP >= A7XX && flushes & TU_CMD_FLAG_BLIT_CACHE_CLEAN)
|
||||
/* On A7XX, blit cache flushes are required to ensure blit writes are visible
|
||||
* via UCHE. This isn't necessary on A6XX, all writes should be visible implictly.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_FLUSH_BLIT_CACHE);
|
||||
tu_emit_event_write<CHIP>(cmd_buffer, cs, FD_CCU_CLEAN_BLIT_CACHE);
|
||||
if (CHIP >= A7XX && (flushes & TU_CMD_FLAG_CCHE_INVALIDATE) &&
|
||||
/* Invalidating UCHE seems to also invalidate CCHE */
|
||||
!(flushes & TU_CMD_FLAG_CACHE_INVALIDATE))
|
||||
|
|
@ -345,11 +345,11 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
|
|||
if (ccu_state != cmd_buffer->state.ccu_state) {
|
||||
if (cmd_buffer->state.ccu_state != TU_CMD_CCU_GMEM) {
|
||||
cmd_buffer->state.cache.flush_bits |=
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
||||
TU_CMD_FLAG_CCU_FLUSH_DEPTH;
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR |
|
||||
TU_CMD_FLAG_CCU_CLEAN_DEPTH;
|
||||
cmd_buffer->state.cache.pending_flush_bits &= ~(
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
||||
TU_CMD_FLAG_CCU_FLUSH_DEPTH);
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR |
|
||||
TU_CMD_FLAG_CCU_CLEAN_DEPTH);
|
||||
}
|
||||
cmd_buffer->state.cache.flush_bits |=
|
||||
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
|
||||
|
|
@ -1139,9 +1139,9 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
|
|||
* resolve case. However, a flush afterwards isn't needed because of the
|
||||
* last sentence and the fact that we're in sysmem mode.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
if (subpass->resolve_depth_stencil)
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_DEPTH);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_DEPTH);
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
|
||||
|
||||
|
|
@ -1597,7 +1597,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
* emit_vsc_overflow_test) or the VSC_DATA buffer directly (implicitly as
|
||||
* part of draws).
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_FLUSH);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_CLEAN);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
|
|
@ -2059,7 +2059,7 @@ tu6_tile_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_lrz_tiling_end<CHIP>(cmd, cs);
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_FLUSH_BLIT_CACHE);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
|
||||
|
||||
tu_cs_sanity_check(cs);
|
||||
}
|
||||
|
|
@ -3239,14 +3239,14 @@ tu_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
|
|||
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
|
||||
}
|
||||
|
||||
/* Flush everything which has been made available but we haven't actually
|
||||
* flushed yet.
|
||||
/* Clean everything which has been made available but we haven't actually
|
||||
* cleaned yet.
|
||||
*/
|
||||
static void
|
||||
tu_flush_all_pending(struct tu_cache_state *cache)
|
||||
tu_clean_all_pending(struct tu_cache_state *cache)
|
||||
{
|
||||
cache->flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_FLUSH;
|
||||
cache->pending_flush_bits &= ~TU_CMD_FLAG_ALL_FLUSH;
|
||||
cache->flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_CLEAN;
|
||||
cache->pending_flush_bits &= ~TU_CMD_FLAG_ALL_CLEAN;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -3268,15 +3268,15 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|||
* command buffers there wouldn't be any unnecessary flushes in between.
|
||||
*/
|
||||
if (cmd_buffer->state.pass) {
|
||||
tu_flush_all_pending(&cmd_buffer->state.renderpass_cache);
|
||||
tu_clean_all_pending(&cmd_buffer->state.renderpass_cache);
|
||||
tu_emit_cache_flush_renderpass<CHIP>(cmd_buffer);
|
||||
|
||||
trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->draw_cs);
|
||||
} else {
|
||||
tu_flush_all_pending(&cmd_buffer->state.cache);
|
||||
tu_clean_all_pending(&cmd_buffer->state.cache);
|
||||
cmd_buffer->state.cache.flush_bits |=
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
||||
TU_CMD_FLAG_CCU_FLUSH_DEPTH;
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR |
|
||||
TU_CMD_FLAG_CCU_CLEAN_DEPTH;
|
||||
tu_emit_cache_flush<CHIP>(cmd_buffer);
|
||||
|
||||
trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs);
|
||||
|
|
@ -3520,27 +3520,27 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
TU_CMD_FLAG_ALL_INVALIDATE;
|
||||
}
|
||||
|
||||
#define SRC_FLUSH(domain, flush, invalidate) \
|
||||
#define SRC_FLUSH(domain, clean, invalidate) \
|
||||
if (src_mask & TU_ACCESS_##domain##_WRITE) { \
|
||||
cache->pending_flush_bits |= TU_CMD_FLAG_##flush | \
|
||||
cache->pending_flush_bits |= TU_CMD_FLAG_##clean | \
|
||||
(TU_CMD_FLAG_ALL_INVALIDATE & ~TU_CMD_FLAG_##invalidate); \
|
||||
}
|
||||
|
||||
SRC_FLUSH(UCHE, CACHE_FLUSH, CACHE_INVALIDATE)
|
||||
SRC_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
|
||||
SRC_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
SRC_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
|
||||
SRC_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
SRC_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
|
||||
#undef SRC_FLUSH
|
||||
|
||||
#define SRC_INCOHERENT_FLUSH(domain, flush, invalidate) \
|
||||
#define SRC_INCOHERENT_FLUSH(domain, clean, invalidate) \
|
||||
if (src_mask & TU_ACCESS_##domain##_INCOHERENT_WRITE) { \
|
||||
flush_bits |= TU_CMD_FLAG_##flush; \
|
||||
flush_bits |= TU_CMD_FLAG_##clean; \
|
||||
cache->pending_flush_bits |= \
|
||||
(TU_CMD_FLAG_ALL_INVALIDATE & ~TU_CMD_FLAG_##invalidate); \
|
||||
}
|
||||
|
||||
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
|
||||
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
SRC_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
SRC_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
|
||||
#undef SRC_INCOHERENT_FLUSH
|
||||
|
||||
|
|
@ -3548,20 +3548,20 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
* drains the queue before signalling completion to the host.
|
||||
*/
|
||||
if (dst_mask & (TU_ACCESS_SYSMEM_READ | TU_ACCESS_SYSMEM_WRITE)) {
|
||||
flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_FLUSH;
|
||||
flush_bits |= cache->pending_flush_bits & TU_CMD_FLAG_ALL_CLEAN;
|
||||
}
|
||||
|
||||
#define DST_FLUSH(domain, flush, invalidate) \
|
||||
#define DST_FLUSH(domain, clean, invalidate) \
|
||||
if (dst_mask & (TU_ACCESS_##domain##_READ | \
|
||||
TU_ACCESS_##domain##_WRITE)) { \
|
||||
flush_bits |= cache->pending_flush_bits & \
|
||||
(TU_CMD_FLAG_##invalidate | \
|
||||
(TU_CMD_FLAG_ALL_FLUSH & ~TU_CMD_FLAG_##flush)); \
|
||||
(TU_CMD_FLAG_ALL_CLEAN & ~TU_CMD_FLAG_##clean)); \
|
||||
}
|
||||
|
||||
DST_FLUSH(UCHE, CACHE_FLUSH, CACHE_INVALIDATE)
|
||||
DST_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
|
||||
DST_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
DST_FLUSH(UCHE, CACHE_CLEAN, CACHE_INVALIDATE)
|
||||
DST_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
DST_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
|
||||
#undef DST_FLUSH
|
||||
|
||||
|
|
@ -3570,11 +3570,11 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
TU_ACCESS_##domain##_INCOHERENT_WRITE)) { \
|
||||
flush_bits |= TU_CMD_FLAG_##invalidate | \
|
||||
(cache->pending_flush_bits & \
|
||||
(TU_CMD_FLAG_ALL_FLUSH & ~TU_CMD_FLAG_##flush)); \
|
||||
(TU_CMD_FLAG_ALL_CLEAN & ~TU_CMD_FLAG_##flush)); \
|
||||
}
|
||||
|
||||
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
|
||||
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_CLEAN_COLOR, CCU_INVALIDATE_COLOR)
|
||||
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_CLEAN_DEPTH, CCU_INVALIDATE_DEPTH)
|
||||
|
||||
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
|
||||
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
|
||||
|
|
@ -3589,7 +3589,7 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
|
||||
/* The blit cache is a special case dependency between CP_EVENT_WRITE::BLIT
|
||||
* (from GMEM loads/clears) to any GMEM attachment reads done via the UCHE
|
||||
* (Eg: Input attachments/CP_BLIT) which needs an explicit BLIT_CACHE_FLUSH
|
||||
* (Eg: Input attachments/CP_BLIT) which needs an explicit BLIT_CACHE_CLEAN
|
||||
* for the event blit writes to land, it has the following properties:
|
||||
* - Set on reads rather than on writes, like flushes.
|
||||
* - Not executed automatically if pending, like invalidates.
|
||||
|
|
@ -3597,12 +3597,12 @@ tu_flush_for_access(struct tu_cache_state *cache,
|
|||
* continuing the render pass.
|
||||
*/
|
||||
if (src_mask & TU_ACCESS_BLIT_WRITE_GMEM) {
|
||||
cache->pending_flush_bits |= TU_CMD_FLAG_BLIT_CACHE_FLUSH;
|
||||
cache->pending_flush_bits |= TU_CMD_FLAG_BLIT_CACHE_CLEAN;
|
||||
}
|
||||
|
||||
if ((dst_mask & TU_ACCESS_UCHE_READ_GMEM) &&
|
||||
(cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_FLUSH)) {
|
||||
flush_bits |= TU_CMD_FLAG_BLIT_CACHE_FLUSH;
|
||||
(cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_CLEAN)) {
|
||||
flush_bits |= TU_CMD_FLAG_BLIT_CACHE_CLEAN;
|
||||
}
|
||||
|
||||
#undef DST_INCOHERENT_FLUSH
|
||||
|
|
@ -4057,10 +4057,10 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
|
||||
/* Emit any pending flushes. */
|
||||
if (cmd->state.pass) {
|
||||
tu_flush_all_pending(&cmd->state.renderpass_cache);
|
||||
tu_clean_all_pending(&cmd->state.renderpass_cache);
|
||||
TU_CALLX(cmd->device, tu_emit_cache_flush_renderpass)(cmd);
|
||||
} else {
|
||||
tu_flush_all_pending(&cmd->state.cache);
|
||||
tu_clean_all_pending(&cmd->state.cache);
|
||||
TU_CALLX(cmd->device, tu_emit_cache_flush)(cmd);
|
||||
}
|
||||
|
||||
|
|
@ -4221,7 +4221,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
if (cmd->state.pass) {
|
||||
struct tu_cache_state *cache = &cmd->state.renderpass_cache;
|
||||
BITMASK_ENUM(tu_cmd_flush_bits) retained_pending_flush_bits =
|
||||
cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_FLUSH;
|
||||
cache->pending_flush_bits & TU_CMD_FLAG_BLIT_CACHE_CLEAN;
|
||||
tu_cache_init(cache);
|
||||
cache->pending_flush_bits |= retained_pending_flush_bits;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -195,11 +195,11 @@ enum tu_stage {
|
|||
};
|
||||
|
||||
enum tu_cmd_flush_bits {
|
||||
TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
|
||||
TU_CMD_FLAG_CCU_CLEAN_DEPTH = 1 << 0,
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR = 1 << 1,
|
||||
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
|
||||
TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
|
||||
TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
|
||||
TU_CMD_FLAG_CACHE_CLEAN = 1 << 4,
|
||||
TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
|
||||
TU_CMD_FLAG_CCHE_INVALIDATE = 1 << 6,
|
||||
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 7,
|
||||
|
|
@ -209,12 +209,12 @@ enum tu_cmd_flush_bits {
|
|||
/* This is an unusual flush that isn't automatically executed if pending,
|
||||
* as it isn't necessary. Therefore, it's not included in ALL_FLUSH.
|
||||
*/
|
||||
TU_CMD_FLAG_BLIT_CACHE_FLUSH = 1 << 11,
|
||||
TU_CMD_FLAG_BLIT_CACHE_CLEAN = 1 << 11,
|
||||
|
||||
TU_CMD_FLAG_ALL_FLUSH =
|
||||
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
|
||||
TU_CMD_FLAG_CCU_FLUSH_COLOR |
|
||||
TU_CMD_FLAG_CACHE_FLUSH |
|
||||
TU_CMD_FLAG_ALL_CLEAN =
|
||||
TU_CMD_FLAG_CCU_CLEAN_DEPTH |
|
||||
TU_CMD_FLAG_CCU_CLEAN_COLOR |
|
||||
TU_CMD_FLAG_CACHE_CLEAN |
|
||||
/* Treat the CP as a sort of "cache" which may need to be "flushed" via
|
||||
* waiting for writes to land with WAIT_FOR_MEM_WRITES.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1481,7 +1481,7 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_WRITE_PRIMITIVE_COUNTS);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_FLUSH);
|
||||
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_CLEAN);
|
||||
|
||||
/* Set the count of written primitives */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
|
||||
|
|
@ -1492,7 +1492,7 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_qw(cs, end_written_iova);
|
||||
tu_cs_emit_qw(cs, begin_written_iova);
|
||||
|
||||
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_FLUSH);
|
||||
tu_emit_event_write<CHIP>(cmdbuf, cs, FD_CACHE_CLEAN);
|
||||
|
||||
/* Set the count of generated primitives */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue