diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 64c4406c44f..65182e55e14 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -93,6 +93,7 @@ #define PKT3_INDEX_TYPE 0x2A /* GFX6-8 */ #define PKT3_DRAW_INDIRECT_MULTI 0x2C #define R_2C3_DRAW_INDEX_LOC 0x2C3 +#define S_2C3_THREAD_TRACE_MARKER_ENABLE(x) (((unsigned)(x)&0x1) << 29) #define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30) #define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31) #define PKT3_DRAW_INDEX_AUTO 0x2D diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index df7bcd1ef90..b9287125942 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -10448,7 +10448,7 @@ radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t in /* MUST inline this function to avoid massive perf loss in drawoverhead */ ALWAYS_INLINE static void radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, uint32_t draw_count, - uint64_t count_va, uint32_t stride) + uint64_t count_va, uint32_t stride, bool use_multi) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_cmd_stream *cs = cmd_buffer->cs; @@ -10456,6 +10456,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index bool draw_id_enable = cmd_buffer->state.uses_drawid; uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr; uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0; + const bool sqtt_en = !!device->sqtt.bo; bool predicating = cmd_buffer->state.predicating; assert(base_reg); @@ -10473,7 +10474,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index radeon_begin(cs); - if (draw_count == 1 && !count_va && !draw_id_enable) { + if (!use_multi) { radeon_emit(PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating)); radeon_emit(0); radeon_emit(vertex_offset_reg); @@ -10484,7 +10485,8 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index radeon_emit(0); radeon_emit(vertex_offset_reg); radeon_emit(start_instance_reg); - radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); + radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va) | + S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); radeon_emit(draw_count); /* count */ radeon_emit(count_va); /* count_addr */ radeon_emit(count_va >> 32); @@ -11117,21 +11119,25 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_cmd_state *state = &cmd_buffer->state; + const bool draw_id_enable = cmd_buffer->state.uses_drawid; + const bool use_multi = info->count > 1 || info->count_va || draw_id_enable; struct radv_cmd_stream *cs = cmd_buffer->cs; radv_emit_indirect_buffer(cs, info->indirect_va, false); if (!state->render.view_mask) { - radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride); + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride, + use_multi); } else { u_foreach_bit (i, state->render.view_mask) { radv_emit_view_index(&cmd_buffer->state, cs, i); - radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride); + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, info->count_va, info->stride, + use_multi); } } - if (device->sqtt.bo) + if (device->sqtt.bo && !use_multi) radv_emit_thread_trace_marker(device, cmd_buffer->cs, cmd_buffer->state.predicating); } diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index 67b80dbec62..043002a3089 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -1378,14 +1378,15 @@ dgc_emit_pkt3_set_base(struct dgc_cmdbuf *cs, nir_def *va) } static void -dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, bool indexed) +dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, nir_def *has_drawid, bool indexed) { + const struct radv_device *device = cs->dev; const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; + const bool sqtt_en = !!device->sqtt.bo; nir_builder *b = cs->b; nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); - nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); nir_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE); vtx_base_sgpr = nir_iand_imm(b, nir_u2u32(b, vtx_base_sgpr), 0x3FFF); @@ -1409,7 +1410,8 @@ dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, bool indexed) dgc_cs_emit_imm(0); dgc_cs_emit(vertex_offset_reg); dgc_cs_emit(nir_bcsel(b, has_baseinstance, start_instance_reg, nir_imm_int(b, 0))); - dgc_cs_emit(nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1)))); + dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1))), + S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); dgc_cs_emit_imm(1); /* draw count */ dgc_cs_emit_imm(0); /* count va low */ dgc_cs_emit_imm(0); /* count va high */ @@ -1440,13 +1442,23 @@ dgc_emit_draw_indirect(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *seq nir_def *va = nir_iadd_imm(b, stream_addr, layout->vk.draw_src_offset_B); + nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); + nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID); + dgc_emit_before_draw(cs, sequence_id, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect, indexed ? EventCmdDrawIndexedIndirect : EventCmdDrawIndirect); dgc_emit_pkt3_set_base(cs, va); - dgc_emit_pkt3_draw_indirect(cs, indexed); + dgc_emit_pkt3_draw_indirect(cs, has_drawid, indexed); - dgc_emit_after_draw(cs, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect); + dgc_gfx12_emit_hiz_wa(cs); + nir_if *if_not_multi = nir_push_if(b, nir_inot(b, has_drawid)); + { + dgc_emit_sqtt_thread_trace_marker(cs); + } + nir_pop_if(b, if_not_multi); + + dgc_emit_sqtt_end_api_marker(cs, indexed ? ApiCmdDrawIndexedIndirect : ApiCmdDrawIndirect); } static void @@ -1508,7 +1520,9 @@ dgc_emit_draw_indexed(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *sequ static void dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *sequence_id, bool indexed) { + const struct radv_device *device = cs->dev; const struct radv_indirect_command_layout *layout = cs->layout; + const bool sqtt_en = !!device->sqtt.bo; nir_builder *b = cs->b; nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); @@ -1541,7 +1555,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s dgc_cs_emit_imm(0); dgc_cs_emit(vertex_offset_reg); dgc_cs_emit(start_instance_reg); - dgc_cs_emit(draw_id_reg); + dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); dgc_cs_emit(draw_count); dgc_cs_emit_imm(0); dgc_cs_emit_imm(0); @@ -1549,7 +1563,8 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s dgc_cs_emit(di_src_sel); dgc_cs_end(); - dgc_emit_after_draw(cs, indexed ? ApiCmdDrawIndexedIndirectCount : ApiCmdDrawIndirectCount); + dgc_gfx12_emit_hiz_wa(cs); + dgc_emit_sqtt_end_api_marker(cs, indexed ? ApiCmdDrawIndexedIndirectCount : ApiCmdDrawIndirectCount); } /**