mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-28 12:50:34 +01:00
anv: predicate BTP emissions
The previous commit enable different command buffers to program the same 3DSTATE_BINDING_TABLE_POOL_ALLOC instruction even though they allocated different chunks of binding tables. Now we can just predicate this programming and skip the stalling, flushing & invalidation. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39527>
This commit is contained in:
parent
725c2a39d5
commit
8a5ac96a67
5 changed files with 62 additions and 8 deletions
|
|
@ -466,6 +466,7 @@ CREATE_DUAL_EVENT_CALLBACK(frame, INTEL_DS_QUEUE_STAGE_FRAME)
|
|||
CREATE_DUAL_EVENT_CALLBACK(batch, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(cmd_buffer, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(sba, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(btp, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(render_pass, INTEL_DS_QUEUE_STAGE_RENDER_PASS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(blorp, INTEL_DS_QUEUE_STAGE_BLORP)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw, INTEL_DS_QUEUE_STAGE_DRAW)
|
||||
|
|
|
|||
|
|
@ -131,6 +131,11 @@ def define_tracepoints(args):
|
|||
tp_args=[Arg(type='uint8_t', var='mode', c_format='%hhu'),],
|
||||
end_pipelined=False)
|
||||
|
||||
# 3DSTATE_BINDING_TABLE_POOL_ALLOC emission, only for Anv
|
||||
begin_end_tp('btp',
|
||||
tp_args=[Arg(type='uint64_t', var='addr', c_format='0x%" PRIx64 "'),],
|
||||
end_pipelined=False)
|
||||
|
||||
# Dynamic rendering tracepoints, only for Anv
|
||||
begin_end_tp('render_pass',
|
||||
tp_args=[Arg(type='uint64_t', var='command_buffer_handle', c_format='%" PRIu64 "', perfetto_field=True),
|
||||
|
|
|
|||
|
|
@ -636,6 +636,13 @@ anv_address_physical(struct anv_address addr)
|
|||
return intel_canonical_address(address);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_address_equals(struct anv_address addr1,
|
||||
struct anv_address addr2)
|
||||
{
|
||||
return anv_address_physical(addr1) == anv_address_physical(addr2);
|
||||
}
|
||||
|
||||
static inline struct u_trace_address
|
||||
anv_address_utrace(struct anv_address addr)
|
||||
{
|
||||
|
|
@ -4743,6 +4750,9 @@ struct anv_cmd_state {
|
|||
uint64_t address[MAX_SETS];
|
||||
} descriptor_buffers;
|
||||
|
||||
/* Last programmed 3DSTATE_BINDING_TABLE_POOL_ALLOC address */
|
||||
struct anv_address btp;
|
||||
|
||||
/* For Gen 9, this allocation is 2 greater than the maximum allowed
|
||||
* number of vertex buffers; see comment on get_max_vbs definition.
|
||||
* Specializing this allocation seems needlessly complicated when we can
|
||||
|
|
|
|||
|
|
@ -418,15 +418,32 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
if (!anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer))
|
||||
return;
|
||||
|
||||
/* If we are emitting a new state base address we probably need to re-emit
|
||||
* binding tables.
|
||||
*/
|
||||
cmd_buffer->state.descriptors_dirty |= ~0;
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
struct anv_address btp = anv_cmd_buffer_surface_base_address(cmd_buffer);
|
||||
if (anv_address_equals(cmd_buffer->state.btp, btp))
|
||||
return;
|
||||
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
const uint32_t mocs = isl_mocs(&device->isl_dev, 0, false);
|
||||
|
||||
trace_intel_begin_btp(cmd_buffer->batch.trace);
|
||||
|
||||
/* Disable stall tracing to avoid leaving a tracepoint with random
|
||||
* timestamp if the STATE_BASE_ADDRESS instruction sequence is skipped
|
||||
* over.
|
||||
*/
|
||||
struct u_trace *tmp_trace = cmd_buffer->batch.trace;
|
||||
cmd_buffer->batch.trace = NULL;
|
||||
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, device->info, &cmd_buffer->batch);
|
||||
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
|
||||
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
|
||||
mi_goto_if(&b,
|
||||
mi_ieq(&b, mi_reg64(ANV_BTP_ADDR_REG),
|
||||
mi_imm(anv_address_physical(btp))),
|
||||
&t);
|
||||
|
||||
/* We're changing base location of binding tables which affects the state
|
||||
* cache. We're adding texture cache invalidation following a
|
||||
* recommendation from the ICL PRMs, Volume 9: Render Engine, Coherency
|
||||
|
|
@ -445,8 +462,7 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
"pre BINDING_TABLE_POOL_ALLOC stall");
|
||||
anv_batch_emit(
|
||||
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
|
||||
btpa.BindingTablePoolBaseAddress =
|
||||
anv_cmd_buffer_surface_base_address(cmd_buffer);
|
||||
btpa.BindingTablePoolBaseAddress = btp;
|
||||
btpa.BindingTablePoolBufferSize = BINDING_TABLE_VIEW_SIZE / 4096;
|
||||
btpa.MOCS = mocs;
|
||||
}
|
||||
|
|
@ -457,9 +473,24 @@ genX(cmd_buffer_emit_bt_pool_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
|||
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,
|
||||
"post BINDING_TABLE_POOL_ALLOC invalidate");
|
||||
|
||||
mi_store(&b, mi_reg64(ANV_BTP_ADDR_REG),
|
||||
mi_imm(anv_address_physical(btp)));
|
||||
|
||||
mi_goto_target(&b, &t);
|
||||
|
||||
cmd_buffer->batch.trace = tmp_trace;
|
||||
cmd_buffer->state.btp = btp;
|
||||
|
||||
trace_intel_end_btp(cmd_buffer->batch.trace, anv_address_physical(btp));
|
||||
|
||||
#else /* GFX_VERx10 < 125 */
|
||||
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
/* If we are emitting a new state base address we probably need to re-emit
|
||||
* binding tables.
|
||||
*/
|
||||
cmd_buffer->state.descriptors_dirty |= ~0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -5,11 +5,12 @@
|
|||
#pragma once
|
||||
|
||||
/* We reserve :
|
||||
* - GPR 12 for 3DSTATE_BINDING_TABLE_POOL_ALLOC address
|
||||
* - GPR 13 for STATE_BASE_ADDRESS bindless surface base address
|
||||
* - GPR 14 for perf queries
|
||||
* - GPR 15 for conditional rendering
|
||||
*/
|
||||
#define MI_BUILDER_NUM_ALLOC_GPRS 13
|
||||
#define MI_BUILDER_NUM_ALLOC_GPRS 12
|
||||
#ifndef MI_BUILDER_CAN_WRITE_BATCH
|
||||
#define MI_BUILDER_CAN_WRITE_BATCH true
|
||||
#endif
|
||||
|
|
@ -38,3 +39,9 @@
|
|||
* emissions if the address doesn't change.
|
||||
*/
|
||||
#define ANV_BINDLESS_SURFACE_BASE_ADDR_REG 0x2668 /* MI_ALU_REG13 */
|
||||
|
||||
/* We reserve this MI ALU register to hold the last programmed
|
||||
* 3DSTATE_BINDING_TABLE_POOL_ALLOC address so that we can predicate
|
||||
* 3DSTATE_BINDING_TABLE_POOL_ALLOC emissions if the address doesn't change.
|
||||
*/
|
||||
#define ANV_BTP_ADDR_REG 0x2660 /* MI_ALU_REG12 */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue