diff --git a/.pick_status.json b/.pick_status.json index 9f6c7c86773..5dc222c83f2 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -534,7 +534,7 @@ "description": "anv: fix transfer barriers flushes with compute queue", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 8c46767c122..0ffaac13be9 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3750,7 +3750,7 @@ genX(CmdExecuteCommands)( } static inline enum anv_pipe_bits -anv_pipe_flush_bits_for_access_flags(struct anv_device *device, +anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, VkAccessFlags2 flags) { enum anv_pipe_bits pipe_bits = 0; @@ -3791,12 +3791,17 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device, * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*() * * Most of these operations are implemented using Blorp which writes - * through the render target, so flush that cache to make it visible - * to future operations. And for depth related operations we also - * need to flush the depth cache. + * through the render target cache or the depth cache on the graphics + * queue. On the compute queue, the writes are done through the data + * port. */ - pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } else { + pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + } break; case VK_ACCESS_2_MEMORY_WRITE_BIT: /* We're transitioning a buffer for generic write operations. Flush @@ -3833,9 +3838,10 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device, } static inline enum anv_pipe_bits -anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, +anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer, VkAccessFlags2 flags) { + struct anv_device *device = cmd_buffer->device; enum anv_pipe_bits pipe_bits = 0; u_foreach_bit64(b, flags) { @@ -4338,8 +4344,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, } enum anv_pipe_bits bits = - anv_pipe_flush_bits_for_access_flags(device, src_flags) | - anv_pipe_invalidate_bits_for_access_flags(device, dst_flags); + anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags) | + anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags); /* Our HW implementation of the sparse feature lives in the GAM unit * (interface between all the GPU caches and external memory). As a result