From 6531617cad21535f4e746cc36d2765843b4c0e96 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 16 Oct 2024 13:47:41 +0300 Subject: [PATCH] anv: avoid L3 fabric flush in pipeline barriers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bit is not needed for barriers and appears to trigger a performance regression. So leave it for just for AUX-TT flushing/invalidation. Signed-off-by: Lionel Landwerlin Fixes: e3814dee1a ("anv: add plumbing/support for L3 fabric flush") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12090 Reviewed-by: Nanley Chery Reviewed-by: Tapani Pälli Part-of: (cherry picked from commit cb224370b6fcda7f73be99d94c98096569b2e2ba) --- .pick_status.json | 2 +- src/intel/vulkan/anv_private.h | 13 +++++++++++-- src/intel/vulkan/genX_cmd_buffer.c | 16 ++++++++-------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 041409fcb94..5b427976263 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -444,7 +444,7 @@ "description": "anv: avoid L3 fabric flush in pipeline barriers", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "e3814dee1ac0f90771b921a4f6f5aed10f06e8d4", "notes": null diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3fecd622294..51f47f7fab7 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3315,6 +3315,9 @@ enum anv_pipe_bits { ANV_PIPE_TLB_INVALIDATE_BIT = (1 << 18), + /* L3 Fabric Flush */ + ANV_PIPE_L3_FABRIC_FLUSH_BIT = (1 << 19), + ANV_PIPE_CS_STALL_BIT = (1 << 20), ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21), @@ -3337,8 +3340,6 @@ enum anv_pipe_bits { */ ANV_PIPE_POST_SYNC_BIT = (1 << 24), - /* L3 Fabric Flush */ - ANV_PIPE_L3_FABRIC_FLUSH_BIT = (1 << 25), }; /* These bits track the state of buffer writes for queries. They get cleared @@ -3404,6 +3405,14 @@ enum anv_query_bits { ANV_PIPE_TILE_CACHE_FLUSH_BIT | \ ANV_PIPE_L3_FABRIC_FLUSH_BIT) +#define ANV_PIPE_BARRIER_FLUSH_BITS ( \ + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \ + ANV_PIPE_DATA_CACHE_FLUSH_BIT | \ + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \ + ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | \ + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \ + ANV_PIPE_TILE_CACHE_FLUSH_BIT) + #define ANV_PIPE_STALL_BITS ( \ ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \ ANV_PIPE_DEPTH_STALL_BIT | \ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 7a9117bd598..d1efef430c8 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3495,14 +3495,14 @@ anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, /* We're transitioning a buffer for generic write operations. Flush * all the caches. */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; + pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS; break; case VK_ACCESS_2_HOST_WRITE_BIT: /* We're transitioning a buffer for access by CPU. Invalidate * all the caches. Since data and tile caches don't have invalidate, * we are forced to flush those as well. */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; + pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS; pipe_bits |= ANV_PIPE_INVALIDATE_BITS; break; case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: @@ -3610,7 +3610,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, /* Generic write, make sure all previously written things land in * memory. */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; + pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS; break; case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: @@ -3628,7 +3628,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, /* We're transitioning a buffer that was written by CPU. Flush * all the caches. */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; + pipe_bits |= ANV_PIPE_BARRIER_FLUSH_BITS; break; case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: /* We're transitioning a buffer to be written by the streamout fixed @@ -4224,7 +4224,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, * to flush anymore. */ if (apply_sparse_flushes) - bits |= ANV_PIPE_FLUSH_BITS; + bits |= ANV_PIPE_BARRIER_FLUSH_BITS; #endif /* Copies from query pools are executed with a shader writing through the @@ -5861,7 +5861,7 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)( if (pOverrideInfo->enable) { /* FLUSH ALL THE THINGS! As requested by the MDAPI team. */ anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_FLUSH_BITS | + ANV_PIPE_BARRIER_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS, "perf counter isolation"); genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); @@ -6104,7 +6104,7 @@ genX(cmd_buffer_begin_companion_rcs_syncpoint)( */ if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { - anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_FLUSH_BITS | + anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_BARRIER_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS | ANV_PIPE_STALL_BITS, "post main cmd buffer invalidate"); @@ -6176,7 +6176,7 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer, * - unblock the CCS */ anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer, - ANV_PIPE_FLUSH_BITS | + ANV_PIPE_BARRIER_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS | ANV_PIPE_STALL_BITS, "post rcs flush");