anv: predicate BTP emissions

The previous commit enable different command buffers to program the
same 3DSTATE_BINDING_TABLE_POOL_ALLOC instruction even though they
allocated different chunks of binding tables.

Now we can just predicate this programming and skip the stalling,
flushing & invalidation.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39527>
This commit is contained in:
Lionel Landwerlin 2026-01-26 13:03:49 +02:00 committed by Marge Bot
parent 725c2a39d5
commit 8a5ac96a67
5 changed files with 62 additions and 8 deletions

View file

@ -466,6 +466,7 @@ CREATE_DUAL_EVENT_CALLBACK(frame, INTEL_DS_QUEUE_STAGE_FRAME)
CREATE_DUAL_EVENT_CALLBACK(batch, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(sba, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(btp, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)

View file

@ -131,6 +131,11 @@ def define_tracepoints(args):
tp_args=[Arg(type='uint8_t', var='mode', c_format='%hhu'),],
end_pipelined=False)
# 3DSTATE_BINDING_TABLE_POOL_ALLOC emission, only for Anv
begin_end_tp('btp',
tp_args=[Arg(type='uint64_t', var='addr', c_format='0x%" PRIx64 "'),],
end_pipelined=False)
# Dynamic rendering tracepoints, only for Anv
begin_end_tp('render_pass',
tp_args=[Arg(type='uint64_t', var='command_buffer_handle', c_format='%" PRIu64 "', perfetto_field=True),

View file

@ -636,6 +636,13 @@ anv_address_physical(struct anv_address addr)
return intel_canonical_address(address);
}
static inline bool
anv_address_equals(struct anv_address addr1,
struct anv_address addr2)
{
return anv_address_physical(addr1) == anv_address_physical(addr2);
}
static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)
{
@ -4743,6 +4750,9 @@ struct anv_cmd_state {
uint64_t address[MAX_SETS];
} descriptor_buffers;
/* Last programmed 3DSTATE_BINDING_TABLE_POOL_ALLOC address */
struct anv_address btp;
/* For Gen 9, this allocation is 2 greater than the maximum allowed
* number of vertex buffers; see comment on get_max_vbs definition.
* Specializing this allocation seems needlessly complicated when we can

View file

@ -418,15 +418,32 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
if (!anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer))
return;
/* If we are emitting a new state base address we probably need to re-emit
* binding tables.
*/
cmd_buffer->state.descriptors_dirty |= ~0;
#if GFX_VERx10 >= 125
struct anv_address btp = anv_cmd_buffer_surface_base_address(cmd_buffer);
if (anv_address_equals(cmd_buffer->state.btp, btp))
return;
struct anv_device *device = cmd_buffer->device;
const uint32_t mocs = isl_mocs(&device->isl_dev, 0, false);
trace_intel_begin_btp(cmd_buffer->batch.trace);
/* Disable stall tracing to avoid leaving a tracepoint with random
* timestamp if the STATE_BASE_ADDRESS instruction sequence is skipped
* over.
*/
struct u_trace *tmp_trace = cmd_buffer->batch.trace;
cmd_buffer->batch.trace = NULL;
struct mi_builder b;
mi_builder_init(&b, device->info, &cmd_buffer->batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
mi_goto_if(&b,
mi_ieq(&b, mi_reg64(ANV_BTP_ADDR_REG),
mi_imm(anv_address_physical(btp))),
&t);
/* We're changing base location of binding tables which affects the state
* cache. We're adding texture cache invalidation following a
* recommendation from the ICL PRMs, Volume 9: Render Engine, Coherency
@ -445,8 +462,7 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
"pre BINDING_TABLE_POOL_ALLOC stall");
anv_batch_emit(
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
btpa.BindingTablePoolBaseAddress =
anv_cmd_buffer_surface_base_address(cmd_buffer);
btpa.BindingTablePoolBaseAddress = btp;
btpa.BindingTablePoolBufferSize = BINDING_TABLE_VIEW_SIZE / 4096;
btpa.MOCS = mocs;
}
@ -457,9 +473,24 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
"post BINDING_TABLE_POOL_ALLOC invalidate");
mi_store(&b, mi_reg64(ANV_BTP_ADDR_REG),
mi_imm(anv_address_physical(btp)));
mi_goto_target(&b, &t);
cmd_buffer->batch.trace = tmp_trace;
cmd_buffer->state.btp = btp;
trace_intel_end_btp(cmd_buffer->batch.trace, anv_address_physical(btp));
#else /* GFX_VERx10 < 125 */
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
#endif
/* If we are emitting a new state base address we probably need to re-emit
* binding tables.
*/
cmd_buffer->state.descriptors_dirty |= ~0;
}
static void

View file

@ -5,11 +5,12 @@
#pragma once
/* We reserve :
* - GPR 12 for 3DSTATE_BINDING_TABLE_POOL_ALLOC address
* - GPR 13 for STATE_BASE_ADDRESS bindless surface base address
* - GPR 14 for perf queries
* - GPR 15 for conditional rendering
*/
#define MI_BUILDER_NUM_ALLOC_GPRS 13
#define MI_BUILDER_NUM_ALLOC_GPRS 12
#ifndef MI_BUILDER_CAN_WRITE_BATCH
#define MI_BUILDER_CAN_WRITE_BATCH true
#endif
@ -38,3 +39,9 @@
* emissions if the address doesn't change.
*/
#define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
/* We reserve this MI ALU register to hold the last programmed
* 3DSTATE_BINDING_TABLE_POOL_ALLOC address so that we can predicate
* 3DSTATE_BINDING_TABLE_POOL_ALLOC emissions if the address doesn't change.
*/
#define ANV_BTP_ADDR_REG 0x2660 /* MI_ALU_REG12 */