diff --git a/.pick_status.json b/.pick_status.json index 155c15266c4..9f6c7c86773 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3114,7 +3114,7 @@ "description": "anv: move *bits_for_access_flags to genX_cmd_buffer", "nominated": false, "nomination_type": 3, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 47c306cd231..9183cbecc28 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2863,226 +2863,6 @@ enum anv_query_bits { enum intel_ds_stall_flag anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits); -static inline enum anv_pipe_bits -anv_pipe_flush_bits_for_access_flags(struct anv_device *device, - VkAccessFlags2 flags) -{ - enum anv_pipe_bits pipe_bits = 0; - - u_foreach_bit64(b, flags) { - switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { - case VK_ACCESS_2_SHADER_WRITE_BIT: - case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: - case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: - /* We're transitioning a buffer that was previously used as write - * destination through the data port. To make its content available - * to future operations, flush the hdc pipeline. - */ - pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: - /* We're transitioning a buffer that was previously used as render - * target. To make its content available to future operations, flush - * the render target cache. - */ - pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - /* We're transitioning a buffer that was previously used as depth - * buffer. To make its content available to future operations, flush - * the depth cache. - */ - pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_TRANSFER_WRITE_BIT: - /* We're transitioning a buffer that was previously used as a - * transfer write destination. Generic write operations include color - * & depth operations as well as buffer operations like : - * - vkCmdClearColorImage() - * - vkCmdClearDepthStencilImage() - * - vkCmdBlitImage() - * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*() - * - * Most of these operations are implemented using Blorp which writes - * through the render target, so flush that cache to make it visible - * to future operations. And for depth related operations we also - * need to flush the depth cache. - */ - pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_MEMORY_WRITE_BIT: - /* We're transitioning a buffer for generic write operations. Flush - * all the caches. - */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; - break; - case VK_ACCESS_2_HOST_WRITE_BIT: - /* We're transitioning a buffer for access by CPU. Invalidate - * all the caches. Since data and tile caches don't have invalidate, - * we are forced to flush those as well. - */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; - pipe_bits |= ANV_PIPE_INVALIDATE_BITS; - break; - case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: - case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: - /* We're transitioning a buffer written either from VS stage or from - * the command streamer (see CmdEndTransformFeedbackEXT), we just - * need to stall the CS. - * - * Streamout writes apparently bypassing L3, in order to make them - * visible to the destination, we need to invalidate the other - * caches. - */ - pipe_bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_INVALIDATE_BITS; - break; - default: - break; /* Nothing to do */ - } - } - - return pipe_bits; -} - -static inline enum anv_pipe_bits -anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, - VkAccessFlags2 flags) -{ - enum anv_pipe_bits pipe_bits = 0; - - u_foreach_bit64(b, flags) { - switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { - case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: - /* Indirect draw commands take a buffer as input that we're going to - * read from the command streamer to load some of the HW registers - * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a - * command streamer stall so that all the cache flushes have - * completed before the command streamer loads from memory. - */ - pipe_bits |= ANV_PIPE_CS_STALL_BIT; - /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex - * through a vertex buffer, so invalidate that cache. - */ - pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; - /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a - * UBO from the buffer, so we need to invalidate constant cache. - */ - pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; - pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; - /* Tile cache flush needed For CmdDipatchIndirect since command - * streamer and vertex fetch aren't L3 coherent. - */ - pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_INDEX_READ_BIT: - case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT: - /* We transitioning a buffer to be used for as input for vkCmdDraw* - * commands, so we invalidate the VF cache to make sure there is no - * stale data when we start rendering. - */ - pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; - break; - case VK_ACCESS_2_UNIFORM_READ_BIT: - case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR: - /* We transitioning a buffer to be used as uniform data. Because - * uniform is accessed through the data port & sampler, we need to - * invalidate the texture cache (sampler) & constant cache (data - * port) to avoid stale data. - */ - pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; - if (device->physical->compiler->indirect_ubos_use_sampler) { - pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; - } else { - pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - } - break; - case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: - case VK_ACCESS_2_TRANSFER_READ_BIT: - case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT: - /* Transitioning a buffer to be read through the sampler, so - * invalidate the texture cache, we don't want any stale data. - */ - pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; - break; - case VK_ACCESS_2_SHADER_READ_BIT: - /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and - * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above - */ - pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; - if (!device->physical->compiler->indirect_ubos_use_sampler) { - pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - } - break; - case VK_ACCESS_2_MEMORY_READ_BIT: - /* Transitioning a buffer for generic read, invalidate all the - * caches. - */ - pipe_bits |= ANV_PIPE_INVALIDATE_BITS; - break; - case VK_ACCESS_2_MEMORY_WRITE_BIT: - /* Generic write, make sure all previously written things land in - * memory. - */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; - break; - case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT: - case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: - /* Transitioning a buffer for conditional rendering or transform - * feedback. We'll load the content of this buffer into HW registers - * using the command streamer, so we need to stall the command - * streamer , so we need to stall the command streamer to make sure - * any in-flight flush operations have completed. - */ - pipe_bits |= ANV_PIPE_CS_STALL_BIT; - pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_HOST_READ_BIT: - /* We're transitioning a buffer that was written by CPU. Flush - * all the caches. - */ - pipe_bits |= ANV_PIPE_FLUSH_BITS; - break; - case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: - /* We're transitioning a buffer to be written by the streamout fixed - * function. This one is apparently not L3 coherent, so we need a - * tile cache flush to make sure any previous write is not going to - * create WaW hazards. - */ - pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; - break; - case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: - /* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a - * storage buffer, physical storage buffer, storage texel buffer, or - * storage image in any shader pipeline stage. - * - * Any storage buffers or images written to must be invalidated and - * flushed before the shader can access them. - * - * Both HDC & Untyped flushes also do invalidation. This is why we use - * this here on Gfx12+. - * - * Gfx11 and prior don't have HDC. Only Data cache flush is available - * and it only operates on the written cache lines. - */ - if (device->info->ver >= 12) { - pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; - pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; - } - break; - default: - break; /* Nothing to do */ - } - } - - return pipe_bits; -} - #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \ VK_IMAGE_ASPECT_PLANE_0_BIT | \ VK_IMAGE_ASPECT_PLANE_1_BIT | \ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index f6ab2f4ed06..8c46767c122 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3749,6 +3749,226 @@ genX(CmdExecuteCommands)( } } +static inline enum anv_pipe_bits +anv_pipe_flush_bits_for_access_flags(struct anv_device *device, + VkAccessFlags2 flags) +{ + enum anv_pipe_bits pipe_bits = 0; + + u_foreach_bit64(b, flags) { + switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { + case VK_ACCESS_2_SHADER_WRITE_BIT: + case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: + case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: + /* We're transitioning a buffer that was previously used as write + * destination through the data port. To make its content available + * to future operations, flush the hdc pipeline. + */ + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: + /* We're transitioning a buffer that was previously used as render + * target. To make its content available to future operations, flush + * the render target cache. + */ + pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + /* We're transitioning a buffer that was previously used as depth + * buffer. To make its content available to future operations, flush + * the depth cache. + */ + pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_TRANSFER_WRITE_BIT: + /* We're transitioning a buffer that was previously used as a + * transfer write destination. Generic write operations include color + * & depth operations as well as buffer operations like : + * - vkCmdClearColorImage() + * - vkCmdClearDepthStencilImage() + * - vkCmdBlitImage() + * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*() + * + * Most of these operations are implemented using Blorp which writes + * through the render target, so flush that cache to make it visible + * to future operations. And for depth related operations we also + * need to flush the depth cache. + */ + pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_MEMORY_WRITE_BIT: + /* We're transitioning a buffer for generic write operations. Flush + * all the caches. + */ + pipe_bits |= ANV_PIPE_FLUSH_BITS; + break; + case VK_ACCESS_2_HOST_WRITE_BIT: + /* We're transitioning a buffer for access by CPU. Invalidate + * all the caches. Since data and tile caches don't have invalidate, + * we are forced to flush those as well. + */ + pipe_bits |= ANV_PIPE_FLUSH_BITS; + pipe_bits |= ANV_PIPE_INVALIDATE_BITS; + break; + case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: + /* We're transitioning a buffer written either from VS stage or from + * the command streamer (see CmdEndTransformFeedbackEXT), we just + * need to stall the CS. + * + * Streamout writes apparently bypassing L3, in order to make them + * visible to the destination, we need to invalidate the other + * caches. + */ + pipe_bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_INVALIDATE_BITS; + break; + default: + break; /* Nothing to do */ + } + } + + return pipe_bits; +} + +static inline enum anv_pipe_bits +anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, + VkAccessFlags2 flags) +{ + enum anv_pipe_bits pipe_bits = 0; + + u_foreach_bit64(b, flags) { + switch ((VkAccessFlags2)BITFIELD64_BIT(b)) { + case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: + /* Indirect draw commands take a buffer as input that we're going to + * read from the command streamer to load some of the HW registers + * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a + * command streamer stall so that all the cache flushes have + * completed before the command streamer loads from memory. + */ + pipe_bits |= ANV_PIPE_CS_STALL_BIT; + /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex + * through a vertex buffer, so invalidate that cache. + */ + pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; + /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a + * UBO from the buffer, so we need to invalidate constant cache. + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; + pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; + /* Tile cache flush needed For CmdDipatchIndirect since command + * streamer and vertex fetch aren't L3 coherent. + */ + pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_INDEX_READ_BIT: + case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT: + /* We transitioning a buffer to be used for as input for vkCmdDraw* + * commands, so we invalidate the VF cache to make sure there is no + * stale data when we start rendering. + */ + pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; + break; + case VK_ACCESS_2_UNIFORM_READ_BIT: + case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR: + /* We transitioning a buffer to be used as uniform data. Because + * uniform is accessed through the data port & sampler, we need to + * invalidate the texture cache (sampler) & constant cache (data + * port) to avoid stale data. + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT; + if (device->physical->compiler->indirect_ubos_use_sampler) { + pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + } else { + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } + break; + case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: + case VK_ACCESS_2_TRANSFER_READ_BIT: + case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT: + /* Transitioning a buffer to be read through the sampler, so + * invalidate the texture cache, we don't want any stale data. + */ + pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + break; + case VK_ACCESS_2_SHADER_READ_BIT: + /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and + * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + if (!device->physical->compiler->indirect_ubos_use_sampler) { + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } + break; + case VK_ACCESS_2_MEMORY_READ_BIT: + /* Transitioning a buffer for generic read, invalidate all the + * caches. + */ + pipe_bits |= ANV_PIPE_INVALIDATE_BITS; + break; + case VK_ACCESS_2_MEMORY_WRITE_BIT: + /* Generic write, make sure all previously written things land in + * memory. + */ + pipe_bits |= ANV_PIPE_FLUSH_BITS; + break; + case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: + /* Transitioning a buffer for conditional rendering or transform + * feedback. We'll load the content of this buffer into HW registers + * using the command streamer, so we need to stall the command + * streamer , so we need to stall the command streamer to make sure + * any in-flight flush operations have completed. + */ + pipe_bits |= ANV_PIPE_CS_STALL_BIT; + pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_HOST_READ_BIT: + /* We're transitioning a buffer that was written by CPU. Flush + * all the caches. + */ + pipe_bits |= ANV_PIPE_FLUSH_BITS; + break; + case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: + /* We're transitioning a buffer to be written by the streamout fixed + * function. This one is apparently not L3 coherent, so we need a + * tile cache flush to make sure any previous write is not going to + * create WaW hazards. + */ + pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; + break; + case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: + /* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a + * storage buffer, physical storage buffer, storage texel buffer, or + * storage image in any shader pipeline stage. + * + * Any storage buffers or images written to must be invalidated and + * flushed before the shader can access them. + * + * Both HDC & Untyped flushes also do invalidation. This is why we use + * this here on Gfx12+. + * + * Gfx11 and prior don't have HDC. Only Data cache flush is available + * and it only operates on the written cache lines. + */ + if (device->info->ver >= 12) { + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + } + break; + default: + break; /* Nothing to do */ + } + } + + return pipe_bits; +} + static inline bool stage_is_shader(const VkPipelineStageFlags2 stage) {