From 8834ef8bcd5ecc385323613bb0bc973cc0a1df30 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 22 Oct 2024 21:37:00 +0300 Subject: [PATCH] anv: use flushing PIPE_CONTROL for event signaling Signed-off-by: Lionel Landwerlin Reviewed-by: Caio Oliveira Part-of: --- src/intel/vulkan/anv_private.h | 6 ++++ src/intel/vulkan/genX_cmd_buffer.c | 51 +++++++++++++++++++----------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 86185d5dd68..d222d35f25d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -4020,6 +4020,12 @@ enum anv_pipe_bits { */ ANV_PIPE_POST_SYNC_BIT = (1 << 24), + /* This bit does not exist directly in PIPE_CONTROL. It indicates that the + * end-of-pipe write needs to be flushed out of L3. On Xe2+ this means that + * we cannot use RESOURCE_BARRIER to write that value since it'll stay in + * L3. + */ + ANV_PIPE_END_OF_PIPE_SYNC_FORCE_FLUSH_L3_BIT = (1 << 25), }; /* These bits track the state of buffer writes for queries. They get cleared diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 65d60ada53e..0936a6ee8e4 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1645,6 +1645,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, VkPipelineStageFlags2 src_stages, VkPipelineStageFlags2 dst_stages, enum anv_pipe_bits bits, + struct anv_address signal_addr, + struct anv_address wait_addr, enum anv_pipe_bits *emitted_flush_bits) { /* What stage require a stall at pixel scoreboard */ @@ -1857,7 +1859,7 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, ANV_PIPE_END_OF_PIPE_SYNC_BIT); uint32_t sync_op = NoWrite; - struct anv_address addr = ANV_NULL_ADDRESS; + struct anv_address addr = signal_addr; /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory": * @@ -1887,12 +1889,15 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, if (flush_bits & ANV_PIPE_END_OF_PIPE_SYNC_BIT) { flush_bits |= ANV_PIPE_CS_STALL_BIT; sync_op = WriteImmediateData; - addr = device->workaround_address; + if (anv_address_is_null(signal_addr)) + addr = device->workaround_address; } /* Flush PC. */ emit_pipe_control(batch, device->info, current_pipeline, - sync_op, addr, 0, flush_bits); + sync_op, addr, + anv_address_is_null(addr) ? 0 : 1, + flush_bits); /* If the caller wants to know what flushes have been emitted, * provide the bits based off the PIPE_CONTROL programmed bits. @@ -1901,7 +1906,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, *emitted_flush_bits = flush_bits; bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS | - ANV_PIPE_END_OF_PIPE_SYNC_BIT); + ANV_PIPE_END_OF_PIPE_SYNC_BIT | + ANV_PIPE_END_OF_PIPE_SYNC_FORCE_FLUSH_L3_BIT); } if (bits & ANV_PIPE_INVALIDATE_BITS) { @@ -2009,6 +2015,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->device, cmd_buffer->state.current_pipeline, src_stages, dst_stages, bits, + ANV_NULL_ADDRESS, ANV_NULL_ADDRESS, &emitted_bits); anv_cmd_buffer_update_pending_query_bits(cmd_buffer, emitted_bits); @@ -2703,6 +2710,7 @@ emit_pipe_control(struct anv_batch *batch, pipe.InstructionCacheInvalidateEnable = bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT; + assert(!anv_address_is_null(address) || post_sync_op == NoWrite); pipe.PostSyncOperation = post_sync_op; pipe.Address = address; pipe.DestinationAddressType = DAT_PPGTT; @@ -6328,24 +6336,31 @@ void genX(CmdSetEvent2)( case INTEL_ENGINE_CLASS_RENDER: case INTEL_ENGINE_CLASS_COMPUTE: { - VkPipelineStageFlags2 src_stages = - vk_collect_dependency_info_src_stages(pDependencyInfo); + VkPipelineStageFlags2 src_stages, dst_stages; + enum anv_pipe_bits bits = 0; + cmd_buffer_accumulate_barrier_bits(cmd_buffer, 1, pDependencyInfo, + &src_stages, &dst_stages, &bits); - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + /* Only consider the flush bits, the wait part will do the invalidate. + */ + bits &= ANV_PIPE_FLUSH_BITS; - enum anv_pipe_bits pc_bits = 0; - if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) { - pc_bits |= ANV_PIPE_STALL_AT_SCOREBOARD_BIT; - pc_bits |= ANV_PIPE_CS_STALL_BIT; - } + /* To have the signal_addr written */ + bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT; - genX(batch_emit_pipe_control_write) - (&cmd_buffer->batch, cmd_buffer->device->info, - cmd_buffer->state.current_pipeline, WriteImmediateData, - anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, + /* Need main memory coherency */ + if ((event->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT) == 0) + bits |= ANV_PIPE_END_OF_PIPE_SYNC_FORCE_FLUSH_L3_BIT; + + genX(emit_apply_pipe_flushes)(&cmd_buffer->batch, + cmd_buffer->device, + cmd_buffer->state.current_pipeline, + src_stages, dst_stages, bits, + anv_state_pool_state_address( + &cmd_buffer->device->dynamic_state_pool, event->state), - 1, pc_bits, "vkCmdSetEvent2"); + ANV_NULL_ADDRESS, + NULL); break; }