tu: Only emit preempt optimization ambles when active

This avoid unnecessarily emitting the switch back/away ambles when
they aren't actually used due to preemption optimization being
disabled. This alleviates unnecessary overhead when not running with
the mitigation for kernel drivers which support it.

Signed-off-by: Dhruv Mark Collins <mark@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40852>
This commit is contained in:
Dhruv Mark Collins 2026-04-07 20:18:36 +00:00 committed by Marge Bot
parent 18437c7a65
commit 46aac5abaf
3 changed files with 9 additions and 6 deletions

View file

@ -2182,11 +2182,11 @@ tu_autotune::emit_reset_rp_hash_draw_state(struct tu_cmd_buffer *cmd, struct tu_
tu_cs_emit_qw(cs, reset_rp_hash_draw_state.iova);
}
void
bool
tu_autotune::emit_preempt_latency_tracking_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
if (!cmd->autotune_ctx.tracks_preempt_latency())
return;
return false;
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4);
tu_cs_emit_qw(cs, global_iova(cmd, max_preemption_latency));
@ -2206,6 +2206,8 @@ tu_autotune::emit_preempt_latency_tracking_setup(struct tu_cmd_buffer *cmd, stru
write_preempt_counters_to_iova(cs, true, true, global_iova(cmd, base_preemption_latency),
global_iova(cmd, base_always_count), global_iova(cmd, base_aon));
return true;
}
tu_autotune::rp_key_opt

View file

@ -357,7 +357,8 @@ struct tu_autotune {
void init_reset_rp_hash_draw_state();
void emit_reset_rp_hash_draw_state(struct tu_cmd_buffer *cmd, struct tu_cs *cs) const;
void emit_preempt_latency_tracking_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
/* Returns if preemption latency tracking is enabled for this CB. */
bool emit_preempt_latency_tracking_setup(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
/* Returns the RP hash only when preemption latency tracking is enabled. */
rp_key_opt emit_preempt_latency_tracking_rp_hash(struct tu_cmd_buffer *cmd);
};

View file

@ -2436,7 +2436,7 @@ tu_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu7_set_thread_br_patchpoint(cmd, cs, false);
}
dev->autotune->emit_preempt_latency_tracking_setup(cmd, cs);
bool track_preempt_latency = dev->autotune->emit_preempt_latency_tracking_setup(cmd, cs);
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
tu_cs_emit_qw(cs, cmd->device->bin_preamble_entry.bo->iova +
@ -2462,7 +2462,7 @@ tu_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
(1u << TU_PREDICATE_VTX_STATS_NOT_RUNNING));
}
if (dev->switch_back_amble_entry.size > 0) {
if (dev->switch_back_amble_entry.size > 0 && track_preempt_latency) {
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
tu_cs_emit_qw(cs, dev->switch_back_amble_entry.bo->iova + dev->switch_back_amble_entry.offset);
tu_cs_emit(cs, CP_SET_AMBLE_2_DWORDS(dev->switch_back_amble_entry.size / sizeof(uint32_t)) |
@ -2473,7 +2473,7 @@ tu_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit(cs, CP_SET_AMBLE_2_TYPE(PREAMBLE_AMBLE_TYPE));
}
if (dev->switch_away_amble_entry.size > 0) {
if (dev->switch_away_amble_entry.size > 0 && track_preempt_latency) {
tu_cs_emit_pkt7(cs, CP_SET_AMBLE, 3);
tu_cs_emit_qw(cs, dev->switch_away_amble_entry.bo->iova + dev->switch_away_amble_entry.offset);
tu_cs_emit(cs, CP_SET_AMBLE_2_DWORDS(dev->switch_away_amble_entry.size / sizeof(uint32_t)) |