anv: add tracking of involved stages in pipe flushes

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38707>
This commit is contained in:
Lionel Landwerlin 2024-10-21 12:02:51 +03:00 committed by Marge Bot
parent 4e8a25cf6f
commit f2c571fabf
11 changed files with 198 additions and 30 deletions

View file

@ -667,7 +667,12 @@ void anv_CmdCopyImage2(
anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
anv_add_pending_pipe_bits(cmd_buffer,
(batch.flags & BLORP_BATCH_USE_COMPUTE) ?
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
pipe_bits,
"Copy flush before astc emu");
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
@ -819,7 +824,12 @@ void anv_CmdCopyBufferToImage2(
anv_cmd_buffer_is_compute_queue(cmd_buffer) ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT :
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits,
anv_add_pending_pipe_bits(cmd_buffer,
(batch.flags & BLORP_BATCH_USE_COMPUTE) ?
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
pipe_bits,
"Copy flush before astc emu");
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
@ -1177,6 +1187,8 @@ anv_cmd_buffer_update_addr(
* texture cache so we don't get anything stale.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_HOST_BIT,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
"before UpdateBuffer");
@ -1886,6 +1898,8 @@ anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
* hangs when doing a clear with WM_HZ_OP.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_DEPTH_STALL_BIT,
"before clear hiz");
@ -1913,6 +1927,8 @@ anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
unsigned wa_flush = cmd_buffer->device->info->verx10 >= 125 ?
ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0;
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
@ -1955,6 +1971,8 @@ anv_fast_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
*/
if (cmd_buffer->device->info->verx10 < 120) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_DEPTH_STALL_BIT,
"after clear hiz");
@ -2565,6 +2583,8 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
* cache before rendering to it.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"before clear DS");
@ -2584,6 +2604,8 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
* cache before someone starts trying to do stencil on it.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"after clear DS");

View file

@ -1796,6 +1796,8 @@ anv_begin_companion_cmd_buffer_helper(struct anv_cmd_buffer **cmd_buffer,
*/
if (prev_cmd_buffer->device->info->has_aux_map) {
anv_add_pending_pipe_bits(prev_cmd_buffer->companion_rcs_cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}

View file

@ -4633,6 +4633,8 @@ struct anv_cmd_state {
struct anv_cmd_compute_state compute;
struct anv_cmd_ray_tracing_state rt;
VkPipelineStageFlags2 pending_src_stages;
VkPipelineStageFlags2 pending_dst_stages;
enum anv_pipe_bits pending_pipe_bits;
/**
@ -6766,21 +6768,30 @@ anv_dump_pipe_bits(enum anv_pipe_bits bits, struct log_stream *stream);
void
anv_cmd_buffer_pending_pipe_debug(struct anv_cmd_buffer *cmd_buffer,
VkPipelineStageFlags2 src_stages,
VkPipelineStageFlags2 dst_stages,
enum anv_pipe_bits bits,
const char* reason);
static inline void
anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,
VkPipelineStageFlags2 src_stages,
VkPipelineStageFlags2 dst_stages,
enum anv_pipe_bits bits,
const char* reason)
{
cmd_buffer->state.pending_src_stages |= src_stages;
cmd_buffer->state.pending_dst_stages |= dst_stages;
cmd_buffer->state.pending_pipe_bits |= bits;
if (unlikely(u_trace_enabled(&cmd_buffer->device->ds.trace_context))) {
if (cmd_buffer->batch.pc_reasons_count < ARRAY_SIZE(cmd_buffer->batch.pc_reasons))
cmd_buffer->batch.pc_reasons[cmd_buffer->batch.pc_reasons_count++] = reason;
}
if (INTEL_DEBUG(DEBUG_PIPE_CONTROL))
anv_cmd_buffer_pending_pipe_debug(cmd_buffer, bits, reason);
if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) {
anv_cmd_buffer_pending_pipe_debug(cmd_buffer,
src_stages, dst_stages, bits,
reason);
}
}
struct anv_performance_configuration_intel {

View file

@ -75,19 +75,34 @@ __anv_perf_warn(struct anv_device *device,
void
anv_cmd_buffer_pending_pipe_debug(struct anv_cmd_buffer *cmd_buffer,
VkPipelineStageFlags2 src_stages,
VkPipelineStageFlags2 dst_stages,
enum anv_pipe_bits bits,
const char* reason)
{
if (bits == 0)
if (bits == 0 && src_stages == 0 && dst_stages == 0)
return;
struct log_stream *stream = mesa_log_streami();
mesa_log_stream_printf(stream, "acc: ");
mesa_log_stream_printf(stream, "bits: ");
mesa_log_stream_printf(stream, "src: ");
u_foreach_bit64(b, src_stages) {
mesa_log_stream_printf(stream, "%s,",
vk_PipelineStageFlagBits2_to_str(BITFIELD_BIT(b)) +
strlen("VK_PIPELINE_STAGE_2_"));
}
mesa_log_stream_printf(stream, " dst: ");
u_foreach_bit64(b, dst_stages) {
mesa_log_stream_printf(stream, "%s,",
vk_PipelineStageFlagBits2_to_str(BITFIELD_BIT(b)) +
strlen("VK_PIPELINE_STAGE_2_"));
}
mesa_log_stream_printf(stream, " bits: ");
anv_dump_pipe_bits(bits, stream);
mesa_log_stream_printf(stream, "reason: %s", reason);
mesa_log_stream_printf(stream, " reason: %s", reason);
mesa_log_stream_printf(stream, "\n");

View file

@ -437,7 +437,10 @@ anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_stru
* dispatch size paramters) is not L3 coherent.
*/
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR,
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
"copy dispatch size for dispatch");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
@ -670,7 +673,10 @@ genX(CmdCopyAccelerationStructureKHR)(
* dispatch paramters) is not L3 coherent.
*/
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_DATA_CACHE_FLUSH_BIT,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TRANSFER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
"bvh size read for dispatch");
}
@ -720,6 +726,8 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
*/
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TRANSFER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
"bvh size read for dispatch");
}

View file

@ -313,6 +313,8 @@ blorp_exec_on_render(struct blorp_batch *batch,
*/
if (blorp_uses_bti_rt_writes(batch, params)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"before blorp BTI change");
@ -380,6 +382,8 @@ blorp_exec_on_render(struct blorp_batch *batch,
*/
if (blorp_uses_bti_rt_writes(batch, params)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"after blorp BTI change");

View file

@ -587,6 +587,8 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_HIZ_CCS &&
final_needs_depth && !initial_depth_valid) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_TILE_CACHE_FLUSH_BIT,
"HIZ-CCS flush");
}
@ -658,6 +660,8 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
*/
if (intel_device_info_is_mtl(cmd_buffer->device->info)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_TILE_CACHE_FLUSH_BIT,
"HIZ-CCS flush");
}
@ -936,6 +940,8 @@ genX(cmd_buffer_load_clear_color)(struct anv_cmd_buffer *cmd_buffer,
* In testing, SKL doesn't actually seem to need this, but HSW does.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
"after load_clear_color surface state update");
#endif
@ -1872,6 +1878,8 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.pending_rhwo_optimization_enabled;
if (rhwo_opt_change) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"change RHWO optimization");
@ -1880,8 +1888,12 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits;
/* Consume the stages here */
cmd_buffer->state.pending_src_stages = 0;
cmd_buffer->state.pending_dst_stages = 0;
if (unlikely(cmd_buffer->device->physical->always_flush_cache))
bits |= ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
bits |= ANV_PIPE_BARRIER_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
else if (bits == 0)
return;
@ -1924,8 +1936,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
genX(emit_apply_pipe_flushes)(&cmd_buffer->batch,
cmd_buffer->device,
cmd_buffer->state.current_pipeline,
bits,
&emitted_bits);
bits, &emitted_bits);
anv_cmd_buffer_update_pending_query_bits(cmd_buffer, emitted_bits);
#if INTEL_NEEDS_WA_1508744258
@ -2892,6 +2903,8 @@ genX(cmd_buffer_begin_companion)(struct anv_cmd_buffer *cmd_buffer,
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
cmd_buffer->device->info->has_aux_map) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}
@ -2935,7 +2948,12 @@ add_pending_pipe_bits_for_color_aux_op(struct anv_cmd_buffer *cmd_buffer,
assert(ret < sizeof(flush_reason));
}
anv_add_pending_pipe_bits(cmd_buffer, pipe_bits, flush_reason);
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
aux_op_clears(next_aux_op) ?
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT :
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
pipe_bits, flush_reason);
}
void
@ -3146,6 +3164,8 @@ genX(cmd_buffer_update_color_aux_op)(struct anv_cmd_buffer *cmd_buffer,
* cache invalidation with the texture cache invalidation done on gfx12.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
"Invalidate for new clear color");
}
@ -3267,6 +3287,8 @@ genX(BeginCommandBuffer)(
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
cmd_buffer->device->info->has_aux_map) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}
@ -3294,6 +3316,8 @@ genX(BeginCommandBuffer)(
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
cmd_buffer->device->info->has_aux_map) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_AUX_TABLE_INVALIDATE_BIT,
"new cmd buffer with aux-tt");
}
@ -3454,6 +3478,8 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer)
*/
if (cmd_buffer->state.queries.clear_bits) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_QUERY_BITS(cmd_buffer->state.queries.clear_bits),
"query clear flush prior command buffer end");
}
@ -3563,6 +3589,8 @@ genX(CmdExecuteCommands)(
*/
if (container->state.queries.clear_bits) {
anv_add_pending_pipe_bits(container,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_QUERY_BITS(container->state.queries.clear_bits),
"query clear flush prior to secondary buffer");
}
@ -3710,6 +3738,8 @@ genX(CmdExecuteCommands)(
*/
if (GFX_VER == 9) {
anv_add_pending_pipe_bits(container,
VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
"Secondary cmd buffer not tracked in VF cache");
}
@ -4707,6 +4737,9 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
* dataport.
*/
if (flush_query_copies) {
src_stages |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
dst_stages |= VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT;
bits |= (GFX_VER >= 12 ?
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : ANV_PIPE_DATA_CACHE_FLUSH_BIT);
}
@ -4741,7 +4774,7 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer_accumulate_barrier_bits(cmd_buffer, n_dep_infos, dep_infos,
&src_stages, &dst_stages, &bits);
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
anv_add_pending_pipe_bits(cmd_buffer, src_stages, dst_stages, bits, reason);
break;
}
@ -4872,6 +4905,8 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.current_pipeline == _3D &&
cmd_buffer->state.queries.clear_bits) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_QUERY_BITS(cmd_buffer->state.queries.clear_bits),
"query clear flush prior to GPGPU");
}
@ -4938,7 +4973,10 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
intel_needs_workaround(cmd_buffer->device->info, 16013063087))
bits |= ANV_PIPE_STATE_CACHE_INVALIDATE_BIT;
anv_add_pending_pipe_bits(cmd_buffer, bits,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
bits,
pipeline == _3D ?
"flush/invalidate PIPELINE_SELECT 3D" :
"flush/invalidate PIPELINE_SELECT GPGPU");
@ -5048,6 +5086,8 @@ genX(cmd_buffer_emit_gfx12_depth_wa)(struct anv_cmd_buffer *cmd_buffer,
* settings while we change the registers.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_DEPTH_STALL_BIT |
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
@ -5123,6 +5163,8 @@ genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(struct anv_cmd_buffer *cmd_buffer
vb_address,
vb_size)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_VF_CACHE_INVALIDATE_BIT,
"vb > 32b range");
@ -5232,6 +5274,8 @@ genX(cmd_buffer_emit_hashing_mode)(struct anv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.current_hash_scale != scale &&
(width > min_size[idx][0] || height > min_size[idx][1])) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"change pixel hash mode");
@ -5940,9 +5984,11 @@ void genX(CmdBeginRendering)(
* in the case that there are no RTs (depth-only rendering), though.
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"change RT");
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"change RT");
}
#endif
@ -6031,6 +6077,8 @@ void genX(CmdEndRendering2KHR)(
* sampler when we blit to the single-sampled resolve target.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
"MSAA resolve");
@ -6047,9 +6095,11 @@ void genX(CmdEndRendering2KHR)(
* sampler when we blit to the single-sampled resolve target.
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,
"MSAA resolve");
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,
"MSAA resolve");
}
#if GFX_VER < 20
@ -6078,7 +6128,10 @@ void genX(CmdEndRendering2KHR)(
* sure unbound regions read 0, as residencyNonResidentStrict
* mandates.
*/
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_TILE_CACHE_FLUSH_BIT,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_TILE_CACHE_FLUSH_BIT,
"sparse MSAA resolve");
}
#endif
@ -6360,6 +6413,8 @@ VkResult genX(CmdSetPerformanceOverrideINTEL)(
if (pOverrideInfo->enable) {
/* FLUSH ALL THE THINGS! As requested by the MDAPI team. */
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS,
"perf counter isolation");
@ -6599,9 +6654,12 @@ genX(cmd_buffer_begin_companion_rcs_syncpoint)(
*/
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
"post main cmd buffer invalidate");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
} else if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) {
@ -6671,6 +6729,8 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
* - unblock the CCS
*/
anv_add_pending_pipe_bits(cmd_buffer->companion_rcs_cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_BARRIER_FLUSH_BITS |
ANV_PIPE_INVALIDATE_BITS |
ANV_PIPE_STALL_BITS,
@ -6817,7 +6877,10 @@ genX(CmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer,
trace_intel_begin_write_buffer_marker(&cmd_buffer->trace);
anv_add_pending_pipe_bits(cmd_buffer, bits, "write buffer marker");
anv_add_pending_pipe_bits(cmd_buffer,
stage,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
bits, "write buffer marker");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct mi_builder b;

View file

@ -136,8 +136,10 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
* sufficient."
*/
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_CS_STALL_BIT,
"flush compute state");
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
ANV_PIPE_CS_STALL_BIT,
"flush compute state");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif

View file

@ -712,6 +712,8 @@ cmd_buffer_maybe_flush_rt_writes(struct anv_cmd_buffer *cmd_buffer,
* in the shader always send the color.
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT,
"change RT due to shader outputs");
@ -854,6 +856,8 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
*/
if (intel_needs_workaround(device->info, 16011411144)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
ANV_PIPE_CS_STALL_BIT,
"before SO_BUFFER change WA");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -889,12 +893,16 @@ cmd_buffer_flush_gfx_state(struct anv_cmd_buffer *cmd_buffer)
if (intel_needs_workaround(device->info, 16011411144)) {
/* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
ANV_PIPE_CS_STALL_BIT,
"after SO_BUFFER change WA");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
} else if (GFX_VER >= 10) {
/* CNL and later require a CS stall after 3DSTATE_SO_BUFFER */
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT,
ANV_PIPE_CS_STALL_BIT,
"after 3DSTATE_SO_BUFFER call");
}
@ -2365,6 +2373,8 @@ void genX(CmdBeginTransformFeedbackEXT)(
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"begin transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -2417,6 +2427,8 @@ void genX(CmdEndTransformFeedbackEXT)(
* commands are processed. This will likely require a pipeline flush."
*/
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"end transform feedback");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);

View file

@ -548,6 +548,10 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
struct anv_gen_indirect_params *params = params_state.map;
anv_add_pending_pipe_bits(cmd_buffer,
gen_kernel->stage == MESA_SHADER_FRAGMENT ?
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT :
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
#if GFX_VER == 9
ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
#endif
@ -597,6 +601,10 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
anv_batch_current_address(&cmd_buffer->batch);
anv_add_pending_pipe_bits(cmd_buffer,
gen_kernel->stage == MESA_SHADER_FRAGMENT ?
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT :
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
ANV_PIPE_CS_STALL_BIT,
"after generated draws batch");
@ -623,6 +631,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
mi_ensure_write_fence(&b);
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
"after generated draws batch increment");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -645,6 +655,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
mi_ensure_write_fence(&b);
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
"after generated draws end");

View file

@ -917,7 +917,10 @@ void genX(CmdResetQueryPool)(
* completed. Otherwise some timestamps written later with MI_STORE_*
* commands might race with the PIPE_CONTROL in the loop above.
*/
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT,
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_CS_STALL_BIT,
"vkCmdResetQueryPool of timestamps");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
break;
@ -1091,6 +1094,9 @@ append_query_clear_flush(struct anv_cmd_buffer *cmd_buffer,
return false;
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_QUERY_BITS(
cmd_buffer->state.queries.clear_bits),
reason);
@ -1735,6 +1741,9 @@ copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
if (needed_flushes) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
needed_flushes,
"CopyQueryPoolResults");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -1847,6 +1856,7 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
uint32_t query_count,
VkQueryResultFlags flags)
{
VkPipelineStageFlags2 wait_stages = 0;
enum anv_pipe_bits needed_flushes = 0;
trace_intel_begin_query_copy_shader(&cmd_buffer->trace);
@ -1867,11 +1877,14 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
}
if ((cmd_buffer->state.queries.buffer_write_bits |
cmd_buffer->state.queries.clear_bits) & ANV_QUERY_WRITES_RT_FLUSH)
cmd_buffer->state.queries.clear_bits) & ANV_QUERY_WRITES_RT_FLUSH) {
wait_stages |= VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
needed_flushes |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
}
if ((cmd_buffer->state.queries.buffer_write_bits |
cmd_buffer->state.queries.clear_bits) & ANV_QUERY_WRITES_DATA_FLUSH) {
wait_stages |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
needed_flushes |= (ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT);
}
@ -1901,6 +1914,8 @@ copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
if (needed_flushes) {
anv_add_pending_pipe_bits(cmd_buffer,
wait_stages,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
needed_flushes | ANV_PIPE_END_OF_PIPE_SYNC_BIT,
"CopyQueryPoolResults");
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@ -2071,6 +2086,8 @@ genX(CmdWriteAccelerationStructuresPropertiesKHR)(
*/
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
ANV_PIPE_END_OF_PIPE_SYNC_BIT |
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
"read BVH data using CS");