radv: create the start/stop CS for SQTT dynamically

This seems much cleaner and will help for future work. Also, this
might help if we want to dynamically change some SQTT parameters
in the future.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9097>
This commit is contained in:
Samuel Pitoiset 2021-02-17 10:16:40 +01:00 committed by Marge Bot
parent 0f7379e308
commit 388bde7e82

View file

@ -393,103 +393,6 @@ radv_emit_wait_for_idle(struct radv_device *device,
RADV_CMD_FLAG_INV_L2, &sqtt_flush_bits, 0);
}
static void
radv_thread_trace_init_cs(struct radv_device *device)
{
struct radeon_winsys *ws = device->ws;
VkResult result;
/* Thread trace start CS. */
for (int family = 0; family < 2; ++family) {
device->thread_trace.start_cs[family] = ws->cs_create(ws, family);
if (!device->thread_trace.start_cs[family])
return;
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(device->thread_trace.start_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(device->thread_trace.start_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_NOP, 0, 0));
radeon_emit(device->thread_trace.start_cs[family], 0);
break;
}
radv_cs_add_buffer(ws, device->thread_trace.start_cs[family],
device->thread_trace.bo);
/* Make sure to wait-for-idle before starting SQTT. */
radv_emit_wait_for_idle(device,
device->thread_trace.start_cs[family],
family);
/* Disable clock gating before starting SQTT. */
radv_emit_inhibit_clockgating(device,
device->thread_trace.start_cs[family],
true);
/* Enable SQG events that collects thread trace data. */
radv_emit_spi_config_cntl(device,
device->thread_trace.start_cs[family],
true);
radv_emit_thread_trace_start(device,
device->thread_trace.start_cs[family],
family);
result = ws->cs_finalize(device->thread_trace.start_cs[family]);
if (result != VK_SUCCESS)
return;
}
/* Thread trace stop CS. */
for (int family = 0; family < 2; ++family) {
device->thread_trace.stop_cs[family] = ws->cs_create(ws, family);
if (!device->thread_trace.stop_cs[family])
return;
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(device->thread_trace.stop_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(device->thread_trace.stop_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_NOP, 0, 0));
radeon_emit(device->thread_trace.stop_cs[family], 0);
break;
}
radv_cs_add_buffer(ws, device->thread_trace.stop_cs[family],
device->thread_trace.bo);
/* Make sure to wait-for-idle before stopping SQTT. */
radv_emit_wait_for_idle(device,
device->thread_trace.stop_cs[family],
family);
radv_emit_thread_trace_stop(device,
device->thread_trace.stop_cs[family],
family);
/* Restore previous state by disabling SQG events. */
radv_emit_spi_config_cntl(device,
device->thread_trace.stop_cs[family],
false);
/* Restore previous state by re-enabling clock gating. */
radv_emit_inhibit_clockgating(device,
device->thread_trace.stop_cs[family],
false);
result = ws->cs_finalize(device->thread_trace.stop_cs[family]);
if (result != VK_SUCCESS)
return;
}
}
static bool
radv_thread_trace_init_bo(struct radv_device *device)
{
@ -530,7 +433,6 @@ radv_thread_trace_init(struct radv_device *device)
if (!radv_thread_trace_init_bo(device))
return false;
radv_thread_trace_init_cs(device);
return true;
}
@ -553,14 +455,18 @@ radv_thread_trace_finish(struct radv_device *device)
static bool
radv_thread_trace_resize_bo(struct radv_device *device, uint32_t expected_size)
{
struct radeon_winsys *ws = device->ws;
/* Destroy the previous thread trace BO. */
ws->buffer_destroy(ws, device->thread_trace.bo);
/* Resize the trace buffer BO by 150% of the expected size to be sure
* it will be enough.
*/
device->thread_trace.buffer_size = expected_size * 1.50;
/* Cleanup and re-initialize thread trace. */
radv_thread_trace_finish(device);
if (!radv_thread_trace_init(device))
/* Re-create the thread trace BO. */
if (!radv_thread_trace_init_bo(device))
return false;
fprintf(stderr, "The thread trace buffer has been resized to %d KB "
@ -572,16 +478,112 @@ radv_thread_trace_resize_bo(struct radv_device *device, uint32_t expected_size)
bool
radv_begin_thread_trace(struct radv_queue *queue)
{
struct radv_device *device = queue->device;
int family = queue->queue_family_index;
struct radeon_cmdbuf *cs = queue->device->thread_trace.start_cs[family];
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
VkResult result;
/* Destroy the previous start CS and create a new one. */
if (device->thread_trace.start_cs[family]) {
ws->cs_destroy(device->thread_trace.start_cs[family]);
device->thread_trace.start_cs[family] = NULL;
}
cs = ws->cs_create(ws, family);
if (!cs)
return false;
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, 0);
break;
}
radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
/* Make sure to wait-for-idle before starting SQTT. */
radv_emit_wait_for_idle(device, cs, family);
/* Disable clock gating before starting SQTT. */
radv_emit_inhibit_clockgating(device, cs, true);
/* Enable SQG events that collects thread trace data. */
radv_emit_spi_config_cntl(device, cs, true);
/* Start SQTT. */
radv_emit_thread_trace_start(device, cs, family);
result = ws->cs_finalize(cs);
if (result != VK_SUCCESS) {
ws->cs_destroy(cs);
return false;
}
device->thread_trace.start_cs[family] = cs;
return radv_queue_internal_submit(queue, cs);
}
bool
radv_end_thread_trace(struct radv_queue *queue)
{
struct radv_device *device = queue->device;
int family = queue->queue_family_index;
struct radeon_cmdbuf *cs = queue->device->thread_trace.stop_cs[family];
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
VkResult result;
/* Destroy the previous stop CS and create a new one. */
if (queue->device->thread_trace.stop_cs[family]) {
ws->cs_destroy(device->thread_trace.stop_cs[family]);
device->thread_trace.stop_cs[family] = NULL;
}
cs = ws->cs_create(ws, family);
if (!cs)
return false;
switch (family) {
case RADV_QUEUE_GENERAL:
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
break;
case RADV_QUEUE_COMPUTE:
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, 0);
break;
}
radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
/* Make sure to wait-for-idle before stopping SQTT. */
radv_emit_wait_for_idle(device, cs, family);
/* Stop SQTT. */
radv_emit_thread_trace_stop(device, cs, family);
/* Restore previous state by disabling SQG events. */
radv_emit_spi_config_cntl(device, cs, false);
/* Restore previous state by re-enabling clock gating. */
radv_emit_inhibit_clockgating(device, cs, false);
result = ws->cs_finalize(cs);
if (result != VK_SUCCESS) {
ws->cs_destroy(cs);
return false;
}
device->thread_trace.stop_cs[family] = cs;
return radv_queue_internal_submit(queue, cs);
}