anv: add current_pipeline for batch_emit_pipe_control

This way we can implemented workarounds depending on the pipeline.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25671>
This commit is contained in:
Tapani Pälli 2023-10-23 09:49:58 +03:00 committed by Marge Bot
parent 8ffc4bd31c
commit 2254eaa3ae
6 changed files with 69 additions and 27 deletions

View file

@ -216,6 +216,7 @@ genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline);
void
genX(batch_set_preemption)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
bool value);
void
@ -224,23 +225,25 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value);
void
genX(batch_emit_pipe_control)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
enum anv_pipe_bits bits,
const char *reason);
void
genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
uint32_t post_sync_op,
struct anv_address address,
uint32_t imm_data,
enum anv_pipe_bits bits,
const char *reason);
#define genx_batch_emit_pipe_control(a, b, c) \
genX(batch_emit_pipe_control) (a, b, c, __func__)
#define genx_batch_emit_pipe_control(a, b, c, d) \
genX(batch_emit_pipe_control) (a, b, c, d, __func__)
#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f) \
genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, __func__)
#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f, g) \
genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, g, __func__)
void genX(batch_emit_breakpoint)(struct anv_batch *batch,
struct anv_device *device,

View file

@ -110,6 +110,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VERx10 >= 125
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
anv_batch_emit(
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
@ -128,6 +129,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
#if GFX_VER >= 12
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
#else
@ -301,6 +303,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
bits |= ANV_PIPE_CS_STALL_BIT;
#endif
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
bits);
}
@ -1331,6 +1334,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
* which involves a first PIPE_CONTROL flush which stalls the pipeline...
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
@ -1349,6 +1353,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
* (see SKL HSD 2132585).
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
@ -1358,6 +1363,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
* complete when the L3 configuration registers are modified.
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
@ -1588,8 +1594,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
}
/* Flush PC. */
genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr,
0, flush_bits);
genx_batch_emit_pipe_control_write(batch, device->info, current_pipeline,
sync_op, addr, 0, flush_bits);
/* If the caller wants to know what flushes have been emitted,
* provide the bits based off the PIPE_CONTROL programmed bits.
@ -1644,8 +1650,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
}
/* Invalidate PC. */
genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr,
0, bits);
genx_batch_emit_pipe_control_write(batch, device->info, current_pipeline,
sync_op, addr, 0, bits);
#if GFX_VER == 12
if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) {
@ -2821,11 +2827,13 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
ALWAYS_INLINE void
genX(batch_emit_pipe_control)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
enum anv_pipe_bits bits,
const char *reason)
{
genX(batch_emit_pipe_control_write)(batch,
devinfo,
current_pipeline,
NoWrite,
ANV_NULL_ADDRESS,
0,
@ -2836,6 +2844,7 @@ genX(batch_emit_pipe_control)(struct anv_batch *batch,
ALWAYS_INLINE void
genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
uint32_t post_sync_op,
struct anv_address address,
uint32_t imm_data,
@ -2908,6 +2917,7 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
void
genX(batch_set_preemption)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
uint32_t current_pipeline,
bool value)
{
#if GFX_VERx10 >= 120
@ -2917,7 +2927,8 @@ genX(batch_set_preemption)(struct anv_batch *batch,
}
/* Wa_16013994831 - we need to insert CS_STALL and 250 noops. */
genx_batch_emit_pipe_control(batch, devinfo, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, devinfo, current_pipeline,
ANV_PIPE_CS_STALL_BIT);
for (unsigned i = 0; i < 250; i++)
anv_batch_emit(batch, GENX(MI_NOOP), noop);
@ -2932,6 +2943,7 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value)
return;
genX(batch_set_preemption)(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
value);
cmd_buffer->state.gfx.object_preemption = value;
#endif
@ -3452,6 +3464,7 @@ emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
{
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
@ -6274,6 +6287,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
@ -6961,6 +6975,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
}
@ -6983,7 +6998,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
* This also seems sufficient to handle Wa_14014097488.
*/
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WriteImmediateData,
cmd_buffer->device->workaround_address, 0, 0);
}
}
@ -7031,6 +7047,7 @@ cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer,
if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
}
#endif /* GFX_VERx10 >= 125 */
@ -7896,7 +7913,8 @@ void genX(CmdSetEvent2)(
}
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WriteImmediateData,
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
event->state),
VK_EVENT_SET, pc_bits);
@ -7931,7 +7949,8 @@ void genX(CmdResetEvent2)(
}
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WriteImmediateData,
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
event->state),
VK_EVENT_RESET,
@ -8093,7 +8112,7 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
fd.Address = addr;
}
} else {
genx_batch_emit_pipe_control_write(batch, device->info,
genx_batch_emit_pipe_control_write(batch, device->info, 0,
WriteTimestamp, addr, 0, 0);
}
break;
@ -8101,7 +8120,7 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
case ANV_TIMESTAMP_CAPTURE_AT_CS_STALL:
genx_batch_emit_pipe_control_write
(batch, device->info, WriteTimestamp, addr, 0,
(batch, device->info, 0, WriteTimestamp, addr, 0,
ANV_PIPE_CS_STALL_BIT);
break;
@ -8149,7 +8168,7 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) &&
(vertex_count == 1 || vertex_count == 2)) {
genx_batch_emit_pipe_control_write
(batch, device->info, WriteImmediateData,
(batch, device->info, 0, WriteImmediateData,
device->workaround_address, 0, 0);
}

View file

@ -1232,6 +1232,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
* On DG2+ also known as Wa_1509820217.
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
#endif
}
@ -1577,6 +1578,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT);
#endif
}
@ -1702,6 +1704,7 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VERx10 >= 125
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_PSS_STALL_SYNC_BIT);
}
#endif
@ -1736,6 +1739,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT |
#if GFX_VER >= 12
@ -1764,6 +1768,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_DEPTH_STALL_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
#if GFX_VER >= 12

View file

@ -83,7 +83,7 @@ emit_common_so_memcpy(struct anv_batch *batch, struct anv_device *device,
/* Wa_16013994831 - Disable preemption during streamout. */
if (intel_needs_workaround(device->info, 16013994831))
genX(batch_set_preemption)(batch, device->info, false);
genX(batch_set_preemption)(batch, device->info, _3D, false);
#endif
anv_batch_emit(batch, GENX(3DSTATE_SBE), sbe) {
@ -174,7 +174,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
* state is not combined with other state changes.
*/
if (intel_needs_workaround(device->info, 16011411144))
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT);
anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) {
#if GFX_VER < 12
@ -200,7 +200,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
/* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */
if (intel_needs_workaround(device->info, 16011411144))
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT);
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
.StreamtoBufferSelects0 = (1 << 0),
@ -216,7 +216,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
#if GFX_VERx10 == 125
/* Wa_14015946265: Send PC with CS stall after SO_DECL. */
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, _3D, ANV_PIPE_CS_STALL_BIT);
#endif
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) {
@ -273,7 +273,7 @@ void
genX(emit_so_memcpy_end)(struct anv_memcpy_state *state)
{
if (intel_device_info_is_dg2(state->device->info))
genX(batch_set_preemption)(state->batch, state->device->info, true);
genX(batch_set_preemption)(state->batch, state->device->info, _3D, true);
anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);

View file

@ -210,6 +210,7 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
* both the cases set Render Target Cache Flush Enable".
*/
genx_batch_emit_pipe_control(batch, device->info,
0,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
#endif
@ -573,7 +574,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
anv_batch_emit(&batch, GENX(STATE_COMPUTE_MODE), zero);
anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite,
genx_batch_emit_pipe_control_write(&batch, device->info, _3D, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
@ -582,7 +583,7 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite,
genx_batch_emit_pipe_control_write(&batch, device->info, _3D, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
@ -633,7 +634,7 @@ init_compute_queue_state(struct anv_queue *queue)
*/
if (intel_needs_workaround(devinfo, 14015782607) &&
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genx_batch_emit_pipe_control(&batch, devinfo,
genx_batch_emit_pipe_control(&batch, devinfo, GPGPU,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
@ -646,7 +647,7 @@ init_compute_queue_state(struct anv_queue *queue)
if (intel_device_info_is_atsm(devinfo) &&
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genx_batch_emit_pipe_control
(&batch, devinfo,
(&batch, devinfo, GPGPU,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
@ -1181,6 +1182,7 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
/* Issue 'nullprim' to commit the state. */
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0);
#endif
}

View file

@ -673,7 +673,8 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WritePSDepthCount, addr, 0,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WritePSDepthCount, addr, 0,
ANV_PIPE_DEPTH_STALL_BIT | (cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0));
}
@ -694,7 +695,8 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, addr,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WriteImmediateData, addr,
available, ANV_PIPE_CS_STALL_BIT);
}
@ -1023,6 +1025,7 @@ void genX(CmdBeginQueryIndexedEXT)(
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
@ -1033,6 +1036,7 @@ void genX(CmdBeginQueryIndexedEXT)(
/* TODO: This might only be necessary for certain stats */
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
@ -1049,6 +1053,7 @@ void genX(CmdBeginQueryIndexedEXT)(
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
@ -1108,6 +1113,7 @@ void genX(CmdBeginQueryIndexedEXT)(
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
cmd_buffer->perf_query_pool = pool;
@ -1170,6 +1176,7 @@ void genX(CmdBeginQueryIndexedEXT)(
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, false);
@ -1210,6 +1217,7 @@ void genX(CmdEndQueryIndexedEXT)(
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
@ -1222,6 +1230,7 @@ void genX(CmdEndQueryIndexedEXT)(
/* TODO: This might only be necessary for certain stats */
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
@ -1240,6 +1249,7 @@ void genX(CmdEndQueryIndexedEXT)(
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
@ -1249,6 +1259,7 @@ void genX(CmdEndQueryIndexedEXT)(
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
cmd_buffer->perf_query_pool = pool;
@ -1327,6 +1338,7 @@ void genX(CmdEndQueryIndexedEXT)(
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
cmd_buffer->state.current_pipeline,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
uint32_t marker_offset = intel_perf_marker_offset();
@ -1392,7 +1404,8 @@ void genX(CmdWriteTimestamp2)(
bool cs_stall_needed =
(GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteTimestamp,
(&cmd_buffer->batch, cmd_buffer->device->info,
cmd_buffer->state.current_pipeline, WriteTimestamp,
anv_address_add(query_addr, 8), 0,
cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0);