radv: use the SQTT enable bit for PKT3_DRAW_{INDEX}_INDIRECT_MULTI

This reports more info in RGP.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39425>
This commit is contained in:
Samuel Pitoiset 2026-01-21 11:57:26 +01:00 committed by Marge Bot
parent e5982496f6
commit c7da19e2bf
3 changed files with 35 additions and 13 deletions

View file

@ -93,6 +93,7 @@
#define PKT3_INDEX_TYPE 0x2A /* GFX6-8 */
#define PKT3_DRAW_INDIRECT_MULTI 0x2C
#define R_2C3_DRAW_INDEX_LOC 0x2C3
#define S_2C3_THREAD_TRACE_MARKER_ENABLE(x) (((unsigned)(x)&0x1) << 29)
#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30)
#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31)
#define PKT3_DRAW_INDEX_AUTO 0x2D

View file

@ -10448,7 +10448,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in
/* MUST inline this function to avoid massive perf loss in drawoverhead */
ALWAYS_INLINE static void
radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, uint32_t draw_count,
uint64_t count_va, uint32_t stride)
uint64_t count_va, uint32_t stride, bool use_multi)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_cmd_stream *cs = cmd_buffer->cs;
@ -10456,6 +10456,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
bool draw_id_enable = cmd_buffer->state.uses_drawid;
uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr;
uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
const bool sqtt_en = !!device->sqtt.bo;
bool predicating = cmd_buffer->state.predicating;
assert(base_reg);
@ -10473,7 +10474,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
radeon_begin(cs);
if (draw_count == 1 && !count_va && !draw_id_enable) {
if (!use_multi) {
radeon_emit(PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
radeon_emit(0);
radeon_emit(vertex_offset_reg);
@ -10484,7 +10485,8 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
radeon_emit(0);
radeon_emit(vertex_offset_reg);
radeon_emit(start_instance_reg);
radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va) |
S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en));
radeon_emit(draw_count); /* count */
radeon_emit(count_va); /* count_addr */
radeon_emit(count_va >> 32);
@ -11117,21 +11119,25 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_cmd_state *state = &cmd_buffer->state;
const bool draw_id_enable = cmd_buffer->state.uses_drawid;
const bool use_multi = info->count > 1 || info->count_va || draw_id_enable;
struct radv_cmd_stream *cs = cmd_buffer->cs;
radv_emit_indirect_buffer(cs, info->indirect_va, false);
if (!state->render.view_mask) {
radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride);
radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride,
use_multi);
} else {
u_foreach_bit (i, state->render.view_mask) {
radv_emit_view_index(&cmd_buffer->state, cs, i);
radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride);
radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride,
use_multi);
}
}
if (device->sqtt.bo)
if (device->sqtt.bo && !use_multi)
radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating);
}

View file

@ -1378,14 +1378,15 @@ dgc_emit_pkt3_set_base(struct dgc_cmdbuf *cs, nir_def *va)
}
static void
dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, bool indexed)
dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, nir_def *has_drawid, bool indexed)
{
const struct radv_device *device = cs->dev;
const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
const bool sqtt_en = !!device->sqtt.bo;
nir_builder *b = cs->b;
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
nir_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
vtx_base_sgpr = nir_iand_imm(b, nir_u2u32(b, vtx_base_sgpr), 0x3FFF);
@ -1409,7 +1410,8 @@ dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, bool indexed)
dgc_cs_emit_imm(0);
dgc_cs_emit(vertex_offset_reg);
dgc_cs_emit(nir_bcsel(b, has_baseinstance, start_instance_reg, nir_imm_int(b, 0)));
dgc_cs_emit(nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1))));
dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1))),
S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit_imm(1); /* draw count */
dgc_cs_emit_imm(0); /* count va low */
dgc_cs_emit_imm(0); /* count va high */
@ -1440,13 +1442,23 @@ dgc_emit_draw_indirect(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *seq
nir_def *va = nir_iadd_imm(b, stream_addr, layout->vk.draw_src_offset_B);
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
dgc_emit_before_draw(cs, sequence_id, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect,
indexed ? EventCmdDrawIndexedIndirect : EventCmdDrawIndirect);
dgc_emit_pkt3_set_base(cs, va);
dgc_emit_pkt3_draw_indirect(cs, indexed);
dgc_emit_pkt3_draw_indirect(cs, has_drawid, indexed);
dgc_emit_after_draw(cs, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect);
dgc_gfx12_emit_hiz_wa(cs);
nir_if *if_not_multi = nir_push_if(b, nir_inot(b, has_drawid));
{
dgc_emit_sqtt_thread_trace_marker(cs);
}
nir_pop_if(b, if_not_multi);
dgc_emit_sqtt_end_api_marker(cs, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect);
}
static void
@ -1508,7 +1520,9 @@ dgc_emit_draw_indexed(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *sequ
static void
dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *sequence_id, bool indexed)
{
const struct radv_device *device = cs->dev;
const struct radv_indirect_command_layout *layout = cs->layout;
const bool sqtt_en = !!device->sqtt.bo;
nir_builder *b = cs->b;
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
@ -1541,7 +1555,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s
dgc_cs_emit_imm(0);
dgc_cs_emit(vertex_offset_reg);
dgc_cs_emit(start_instance_reg);
dgc_cs_emit(draw_id_reg);
dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit(draw_count);
dgc_cs_emit_imm(0);
dgc_cs_emit_imm(0);
@ -1549,7 +1563,8 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s
dgc_cs_emit(di_src_sel);
dgc_cs_end();
dgc_emit_after_draw(cs, indexed ? ApiCmdDrawIndexedIndirectCount : ApiCmdDrawIndirectCount);
dgc_gfx12_emit_hiz_wa(cs);
dgc_emit_sqtt_end_api_marker(cs, indexed ? ApiCmdDrawIndexedIndirectCount : ApiCmdDrawIndirectCount);
}
/**