anv: avoid L3 fabric flush in pipeline barriers

This bit is not needed for barriers and appears to trigger a
performance regression. So leave it for just for AUX-TT
flushing/invalidation.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: e3814dee1a ("anv: add plumbing/support for L3 fabric flush")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12090
Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31915>
This commit is contained in:
Lionel Landwerlin 2024-10-16 13:47:41 +03:00 committed by Marge Bot
parent 98ff271c5a
commit cb224370b6
2 changed files with 19 additions and 10 deletions

View file

@ -3386,6 +3386,9 @@ enum anv_pipe_bits {
ANV_PIPE_TLB_INVALIDATE_BIT = (1 << 18),
/* L3 Fabric Flush */
ANV_PIPE_L3_FABRIC_FLUSH_BIT = (1 << 19),
ANV_PIPE_CS_STALL_BIT = (1 << 20),
ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),
@ -3408,8 +3411,6 @@ enum anv_pipe_bits {
*/
ANV_PIPE_POST_SYNC_BIT = (1 << 24),
/* L3 Fabric Flush */
ANV_PIPE_L3_FABRIC_FLUSH_BIT = (1 << 25),
};
/* These bits track the state of buffer writes for queries. They get cleared
@ -3475,6 +3476,14 @@ enum anv_query_bits {
ANV_PIPE_TILE_CACHE_FLUSH_BIT | \
ANV_PIPE_L3_FABRIC_FLUSH_BIT)
#define ANV_PIPE_BARRIER_FLUSH_BITS ( \
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
ANV_PIPE_TILE_CACHE_FLUSH_BIT)
#define ANV_PIPE_STALL_BITS ( \
ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
ANV_PIPE_DEPTH_STALL_BIT | \

View file

@ -3752,14 +3752,14 @@ anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS;
break;
case VK_ACCESS_2_HOST_WRITE_BIT:
/* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
@ -3867,7 +3867,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS;
break;
case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
@ -3885,7 +3885,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
/* We're transitioning a buffer that was written by CPU. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
/* We're transitioning a buffer to be written by the streamout fixed
@ -4481,7 +4481,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
* to flush anymore.
*/
if (apply_sparse_flushes)
bits |= ANV_PIPE_FLUSH_BITS;
bits |= ANV_PIPE_BARRIER_FLUSH_BITS;
#endif
/* Copies from query pools are executed with a shader writing through the
@ -6121,7 +6121,7 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)(
if (pOverrideInfo->enable) {
/* FLUSH ALL THE THINGS! As requested by the MDAPI team. */
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_FLUSH_BITS |
ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS,
"perf counter isolation");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -6377,7 +6377,7 @@ genX(cmd_buffer_begin_companion_rcs_syncpoint)(
*/
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_FLUSH_BITS |
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
"post main cmd buffer invalidate");
@ -6449,7 +6449,7 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
* - unblock the CCS
*/
anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
ANV_PIPE_FLUSH_BITS |
ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
"post rcs flush");