anv: move *bits_for_access_flags to genX_cmd_buffer

This makes is possible to use GFX_VER macros in these functions.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
(cherry picked from commit d0a3bac163)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27353>
This commit is contained in:
Tapani Pälli 2024-01-11 17:42:57 +02:00 committed by Eric Engestrom
parent 5cc5665915
commit 733dd5db5d
3 changed files with 221 additions and 221 deletions

View file

@ -3114,7 +3114,7 @@
"description": "anv: move *bits_for_access_flags to genX_cmd_buffer",
"nominated": false,
"nomination_type": 3,
"resolution": 4,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -2863,226 +2863,6 @@ enum anv_query_bits {
enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags2 flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_SHADER_WRITE_BIT:
case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available
* to future operations, flush the hdc pipeline.
*/
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
/* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush
* the render target cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
/* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush
* the depth cache.
*/
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_TRANSFER_WRITE_BIT:
/* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like :
* - vkCmdClearColorImage()
* - vkCmdClearDepthStencilImage()
* - vkCmdBlitImage()
* - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
*
* Most of these operations are implemented using Blorp which writes
* through the render target, so flush that cache to make it visible
* to future operations. And for depth related operations we also
* need to flush the depth cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_MEMORY_WRITE_BIT:
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_HOST_WRITE_BIT:
/* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS.
*
* Streamout writes apparently bypassing L3, in order to make them
* visible to the destination, we need to invalidate the other
* caches.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_INVALIDATE_BITS;
break;
default:
break; /* Nothing to do */
}
}
return pipe_bits;
}
static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags2 flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
/* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
* command streamer stall so that all the cache flushes have
* completed before the command streamer loads from memory.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
/* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
* through a vertex buffer, so invalidate that cache.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
/* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
* UBO from the buffer, so we need to invalidate constant cache.
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
/* Tile cache flush needed For CmdDipatchIndirect since command
* streamer and vertex fetch aren't L3 coherent.
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_INDEX_READ_BIT:
case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
/* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_2_UNIFORM_READ_BIT:
case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data
* port) to avoid stale data.
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
if (device->physical->compiler->indirect_ubos_use_sampler) {
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
} else {
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_2_TRANSFER_READ_BIT:
case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_2_SHADER_READ_BIT:
/* Same as VK_ACCESS_2_UNIFORM_READ_BIT and
* VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
if (!device->physical->compiler->indirect_ubos_use_sampler) {
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_MEMORY_READ_BIT:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
*/
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_2_MEMORY_WRITE_BIT:
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command
* streamer , so we need to stall the command streamer to make sure
* any in-flight flush operations have completed.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_HOST_READ_BIT:
/* We're transitioning a buffer that was written by CPU. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
/* We're transitioning a buffer to be written by the streamout fixed
* function. This one is apparently not L3 coherent, so we need a
* tile cache flush to make sure any previous write is not going to
* create WaW hazards.
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
/* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a
* storage buffer, physical storage buffer, storage texel buffer, or
* storage image in any shader pipeline stage.
*
* Any storage buffers or images written to must be invalidated and
* flushed before the shader can access them.
*
* Both HDC & Untyped flushes also do invalidation. This is why we use
* this here on Gfx12+.
*
* Gfx11 and prior don't have HDC. Only Data cache flush is available
* and it only operates on the written cache lines.
*/
if (device->info->ver >= 12) {
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
}
break;
default:
break; /* Nothing to do */
}
}
return pipe_bits;
}
#define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
VK_IMAGE_ASPECT_PLANE_0_BIT | \
VK_IMAGE_ASPECT_PLANE_1_BIT | \

View file

@ -3749,6 +3749,226 @@ genX(CmdExecuteCommands)(
}
}
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags2 flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_SHADER_WRITE_BIT:
case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available
* to future operations, flush the hdc pipeline.
*/
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
/* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush
* the render target cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
/* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush
* the depth cache.
*/
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_TRANSFER_WRITE_BIT:
/* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like :
* - vkCmdClearColorImage()
* - vkCmdClearDepthStencilImage()
* - vkCmdBlitImage()
* - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
*
* Most of these operations are implemented using Blorp which writes
* through the render target, so flush that cache to make it visible
* to future operations. And for depth related operations we also
* need to flush the depth cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_MEMORY_WRITE_BIT:
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_HOST_WRITE_BIT:
/* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS.
*
* Streamout writes apparently bypassing L3, in order to make them
* visible to the destination, we need to invalidate the other
* caches.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_INVALIDATE_BITS;
break;
default:
break; /* Nothing to do */
}
}
return pipe_bits;
}
static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags2 flags)
{
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit64(b, flags) {
switch ((VkAccessFlags2)BITFIELD64_BIT(b)) {
case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
/* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
* command streamer stall so that all the cache flushes have
* completed before the command streamer loads from memory.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
/* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
* through a vertex buffer, so invalidate that cache.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
/* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
* UBO from the buffer, so we need to invalidate constant cache.
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
/* Tile cache flush needed For CmdDipatchIndirect since command
* streamer and vertex fetch aren't L3 coherent.
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_INDEX_READ_BIT:
case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
/* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_2_UNIFORM_READ_BIT:
case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data
* port) to avoid stale data.
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
if (device->physical->compiler->indirect_ubos_use_sampler) {
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
} else {
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_2_TRANSFER_READ_BIT:
case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_2_SHADER_READ_BIT:
/* Same as VK_ACCESS_2_UNIFORM_READ_BIT and
* VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
if (!device->physical->compiler->indirect_ubos_use_sampler) {
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_MEMORY_READ_BIT:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
*/
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_2_MEMORY_WRITE_BIT:
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command
* streamer , so we need to stall the command streamer to make sure
* any in-flight flush operations have completed.
*/
pipe_bits |= ANV_PIPE_CS_STALL_BIT;
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_HOST_READ_BIT:
/* We're transitioning a buffer that was written by CPU. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
/* We're transitioning a buffer to be written by the streamout fixed
* function. This one is apparently not L3 coherent, so we need a
* tile cache flush to make sure any previous write is not going to
* create WaW hazards.
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
/* VK_ACCESS_2_SHADER_STORAGE_READ_BIT specifies read access to a
* storage buffer, physical storage buffer, storage texel buffer, or
* storage image in any shader pipeline stage.
*
* Any storage buffers or images written to must be invalidated and
* flushed before the shader can access them.
*
* Both HDC & Untyped flushes also do invalidation. This is why we use
* this here on Gfx12+.
*
* Gfx11 and prior don't have HDC. Only Data cache flush is available
* and it only operates on the written cache lines.
*/
if (device->info->ver >= 12) {
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
}
break;
default:
break; /* Nothing to do */
}
}
return pipe_bits;
}
static inline bool
stage_is_shader(const VkPipelineStageFlags2 stage)
{